Merge pull request #1044 from happy-san/fix

Fix wildcard search with geo-filter producing maximum 100 results.
This commit is contained in:
Kishore Nallan 2023-06-05 15:54:33 +05:30 committed by GitHub
commit 4c02906528
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 17 additions and 8 deletions

View File

@ -725,7 +725,7 @@ public:
const std::vector<uint32_t>& curated_ids_sorted, const uint32_t* exclude_token_ids,
size_t exclude_token_ids_size, const std::unordered_set<uint32_t>& excluded_group_ids,
uint32_t*& all_result_ids, size_t& all_result_ids_len,
filter_result_iterator_t* const filter_result_iterator, const uint32_t& approx_filter_ids_length,
filter_result_iterator_t* const filter_result_iterator,
const size_t concurrency,
const int* sort_order,
std::array<spp::sparse_hash_map<uint32_t, int64_t>*, 3>& field_values,

View File

@ -571,8 +571,10 @@ void filter_result_iterator_t::init() {
if (filter_node->isOperator) {
if (filter_node->filter_operator == AND) {
and_filter_iterators();
approx_filter_ids_length = std::min(left_it->approx_filter_ids_length, right_it->approx_filter_ids_length);
} else {
or_filter_iterators();
approx_filter_ids_length = std::max(left_it->approx_filter_ids_length, right_it->approx_filter_ids_length);
}
return;
@ -612,6 +614,7 @@ void filter_result_iterator_t::init() {
}
is_filter_result_initialized = true;
approx_filter_ids_length = filter_result.count;
return;
}
@ -635,6 +638,7 @@ void filter_result_iterator_t::init() {
seq_id = filter_result.docs[result_index];
is_filter_result_initialized = true;
approx_filter_ids_length = filter_result.count;
return;
}
@ -681,6 +685,7 @@ void filter_result_iterator_t::init() {
seq_id = filter_result.docs[result_index];
is_filter_result_initialized = true;
approx_filter_ids_length = filter_result.count;
return;
} else if (f.is_float()) {
auto num_tree = index->numerical_index.at(a_filter.field_name);
@ -719,6 +724,7 @@ void filter_result_iterator_t::init() {
seq_id = filter_result.docs[result_index];
is_filter_result_initialized = true;
approx_filter_ids_length = filter_result.count;
return;
} else if (f.is_bool()) {
auto num_tree = index->numerical_index.at(a_filter.field_name);
@ -752,6 +758,7 @@ void filter_result_iterator_t::init() {
seq_id = filter_result.docs[result_index];
is_filter_result_initialized = true;
approx_filter_ids_length = filter_result.count;
return;
} else if (f.is_geopoint()) {
for (uint32_t fi = 0; fi < a_filter.values.size(); fi++) {
@ -895,6 +902,7 @@ void filter_result_iterator_t::init() {
seq_id = filter_result.docs[result_index];
is_filter_result_initialized = true;
approx_filter_ids_length = filter_result.count;
return;
} else if (f.is_string()) {
art_tree* t = index->search_index.at(a_filter.field_name);

View File

@ -1618,7 +1618,9 @@ Option<bool> Index::_approximate_filter_ids(const filter& a_filter,
value_index++;
}
} else if (f.is_geopoint()) {
filter_ids_length = 100;
// Optimistically setting a value greater than 0. Exact count would be found during initialization of
// filter_result_iterator.
filter_ids_length = 1;
} else if (f.is_string()) {
art_tree* t = search_index.at(a_filter.field_name);
@ -2250,7 +2252,7 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
}
auto filter_result_iterator = new filter_result_iterator_t(collection_name, this, filter_tree_root,
approx_filter_ids_length);
approx_filter_ids_length);
std::unique_ptr<filter_result_iterator_t> filter_iterator_guard(filter_result_iterator);
auto filter_init_op = filter_result_iterator->init_status();
@ -2369,8 +2371,6 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
if (no_filters_provided) {
filter_result_iterator = new filter_result_iterator_t(seq_ids->uncompress(), seq_ids->num_ids());
filter_iterator_guard.reset(filter_result_iterator);
approx_filter_ids_length = filter_result_iterator->approx_filter_ids_length;
}
collate_included_ids({}, included_ids_map, curated_topster, searched_queries);
@ -2483,7 +2483,7 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
curated_ids, curated_ids_sorted,
excluded_result_ids, excluded_result_ids_size, excluded_group_ids,
all_result_ids, all_result_ids_len,
filter_result_iterator, approx_filter_ids_length, concurrency,
filter_result_iterator, concurrency,
sort_order, field_values, geopoint_indices);
filter_result_iterator->reset();
}
@ -4467,12 +4467,13 @@ void Index::search_wildcard(filter_node_t const* const& filter_tree_root,
const std::vector<uint32_t>& curated_ids_sorted, const uint32_t* exclude_token_ids,
size_t exclude_token_ids_size, const std::unordered_set<uint32_t>& excluded_group_ids,
uint32_t*& all_result_ids, size_t& all_result_ids_len,
filter_result_iterator_t* const filter_result_iterator, const uint32_t& approx_filter_ids_length,
filter_result_iterator_t* const filter_result_iterator,
const size_t concurrency,
const int* sort_order,
std::array<spp::sparse_hash_map<uint32_t, int64_t>*, 3>& field_values,
const std::vector<size_t>& geopoint_indices) const {
auto const& approx_filter_ids_length = filter_result_iterator->approx_filter_ids_length;
uint32_t token_bits = 0;
const bool check_for_circuit_break = (approx_filter_ids_length > 1000000);

View File

@ -2281,7 +2281,7 @@ TEST_F(CollectionSpecificMoreTest, ApproxFilterMatchCount) {
ASSERT_TRUE(filter_op.ok());
coll->_get_index()->_approximate_filter_ids(filter_tree_root->filter_exp, approx_count);
ASSERT_EQ(approx_count, 100);
ASSERT_EQ(approx_count, 1);
delete filter_tree_root;
filter_op = filter::parse_filter_query("years:>2000 && ((age:<30 && rating:>5) || (age:>50 && rating:<5))",