Fix crash on calling compute_string_components multiple times in a complex filter query. (#1539)

* Add failing test.

* Fix crash on calling  multiple times in  a complex filter query.

* Rename `compute_result` to `compute_string_components`.
This commit is contained in:
Harpreet Sangar 2024-02-07 12:48:19 +05:30 committed by GitHub
parent 7b616b19fa
commit a94062481f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 25 additions and 21 deletions

View File

@ -322,7 +322,7 @@ public:
Option<bool> init_status();
/// Recursively computes the result of each node and stores the final result in the root node.
void compute_result();
void compute_string_components();
/// Returns a tri-state:
/// 0: id is not valid

View File

@ -429,7 +429,7 @@ void filter_result_iterator_t::next() {
return;
}
// No need to traverse iterator tree if there's only one filter or compute_result() has been called.
// No need to traverse iterator tree if there's only one filter or compute_string_components() has been called.
if (is_filter_result_initialized) {
if (++result_index >= filter_result.count) {
validity = invalid;
@ -629,7 +629,7 @@ void filter_result_iterator_t::init() {
approx_filter_ids_length = std::max(left_it->approx_filter_ids_length, right_it->approx_filter_ids_length);
}
// Rearranging the subtree in hope to reduce computation if/when compute_result() is called.
// Rearranging the subtree in hope to reduce computation if/when compute_string_components() is called.
if (left_it->approx_filter_ids_length > right_it->approx_filter_ids_length) {
std::swap(left_it, right_it);
}
@ -1165,19 +1165,19 @@ void filter_result_iterator_t::init() {
}
if (a_filter.values.size() > 10) {
compute_result();
compute_string_components();
return;
}
if (a_filter.apply_not_equals &&
index->seq_ids->num_ids() - approx_filter_ids_length < string_filter_ids_threshold) {
// Since there are very few matches, and we have to apply not equals, iteration will be inefficient.
compute_result();
compute_string_components();
return;
} else if (a_filter.apply_not_equals) {
all_seq_ids_iter = index->seq_ids->new_iterator();
} else if (approx_filter_ids_length < string_filter_ids_threshold) {
compute_result();
compute_string_components();
return;
}
@ -1191,7 +1191,7 @@ void filter_result_iterator_t::skip_to(uint32_t id, const bool& override_timeout
return;
}
// No need to traverse iterator tree if there's only one filter or compute_result() has been called.
// No need to traverse iterator tree if there's only one filter or compute_string_components() has been called.
if (is_filter_result_initialized) {
ArrayUtils::skip_index_to_id(result_index, filter_result.docs, filter_result.count, id);
@ -1306,7 +1306,7 @@ int filter_result_iterator_t::is_valid(uint32_t id) {
return -1;
}
// No need to traverse iterator tree if there's only one filter or compute_result() has been called.
// No need to traverse iterator tree if there's only one filter or compute_string_components() has been called.
if (is_filter_result_initialized) {
skip_to(id);
return validity ? (seq_id == id ? 1 : 0) : -1;
@ -1436,7 +1436,7 @@ void filter_result_iterator_t::reset(const bool& override_timeout) {
return;
}
// No need to traverse iterator tree if there's only one filter or compute_result() has been called.
// No need to traverse iterator tree if there's only one filter or compute_string_components() has been called.
if (is_filter_result_initialized) {
if (filter_result.count == 0) {
validity = invalid;
@ -1589,7 +1589,7 @@ void filter_result_iterator_t::and_scalar(const uint32_t* A, const uint32_t& len
}
if (!is_filter_result_initialized) {
compute_result();
compute_string_components();
}
std::vector<uint32_t> match_indexes;
@ -1755,7 +1755,7 @@ void filter_result_iterator_t::get_n_ids(const uint32_t& n,
return get_n_ids(n, result, override_timeout);
}
// This method is only called in Index::search_wildcard after filter_result_iterator_t::compute_result.
// This method is only called in Index::search_wildcard after filter_result_iterator_t::compute_string_components.
if (!is_filter_result_initialized) {
return;
}
@ -1857,7 +1857,7 @@ void filter_result_iterator_t::add_phrase_ids(filter_result_iterator_t*& fit,
fit = root_iterator;
}
void filter_result_iterator_t::compute_result() {
void filter_result_iterator_t::compute_string_components() {
if (filter_node == nullptr) {
validity = invalid;
is_filter_result_initialized = false;
@ -1869,9 +1869,13 @@ void filter_result_iterator_t::compute_result() {
return;
}
if (is_filter_result_initialized) {
return;
}
if (filter_node->isOperator) {
left_it->compute_result();
right_it->compute_result();
left_it->compute_string_components();
right_it->compute_string_components();
if (filter_node->filter_operator == AND) {
filter_result_t::and_filter_results(left_it->filter_result, right_it->filter_result, filter_result);
@ -1901,7 +1905,7 @@ void filter_result_iterator_t::compute_result() {
}
// Only string field filter needs to be evaluated.
if (is_filter_result_initialized || index->search_index.count(filter_node->filter_exp.field_name) == 0) {
if (index->search_index.count(filter_node->filter_exp.field_name) == 0) {
return;
}

View File

@ -1820,7 +1820,7 @@ Option<bool> Index::do_filtering_with_lock(filter_node_t* const filter_tree_root
return filter_init_op;
}
filter_result_iterator.compute_result();
filter_result_iterator.compute_string_components();
if (filter_result_iterator.approx_filter_ids_length == 0) {
return Option(true);
}
@ -1880,7 +1880,7 @@ Option<bool> Index::do_reference_filtering_with_lock(filter_node_t* const filter
return filter_init_op;
}
ref_filter_result_iterator.compute_result();
ref_filter_result_iterator.compute_string_components();
if (ref_filter_result_iterator.approx_filter_ids_length == 0) {
return Option(true);
}
@ -2873,12 +2873,12 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
#ifdef TEST_BUILD
if (filter_result_iterator->approx_filter_ids_length > 20) {
filter_result_iterator->compute_result();
filter_result_iterator->compute_string_components();
}
#else
if (filter_result_iterator->approx_filter_ids_length < 25'000) {
filter_result_iterator->compute_result();
filter_result_iterator->compute_string_components();
}
#endif
@ -5930,7 +5930,7 @@ Option<bool> Index::search_wildcard(filter_node_t const* const& filter_tree_root
const std::vector<size_t>& geopoint_indices,
const std::string& collection_name) const {
filter_result_iterator->compute_result();
filter_result_iterator->compute_string_components();
auto const& approx_filter_ids_length = filter_result_iterator->approx_filter_ids_length;
uint32_t token_bits = 0;

View File

@ -2319,7 +2319,7 @@ TEST_F(CollectionFilteringTest, ComputeFilterResult) {
ASSERT_EQ(10, results["found"]);
res_op = coll1->search("*", {}, "title: bar",
res_op = coll1->search("*", {}, "title: bar && points:>=10",
{}, {}, {0}, 10, 1, FREQUENCY, {true});
results = res_op.get();