Merge pull request #1394 from happy-san/filtering_cutoff

Filtering cutoff
This commit is contained in:
Kishore Nallan 2023-12-05 19:05:23 +05:30 committed by GitHub
commit b3f8954229
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 377 additions and 191 deletions

View File

@ -4,6 +4,7 @@
#include <map>
#include <utility>
#include <vector>
#include <memory>
#include "option.h"
#include "posting_list.h"
@ -144,6 +145,20 @@ struct filter_result_t {
static void copy_references(const filter_result_t& from, filter_result_t& to);
};
#ifdef TEST_BUILD
constexpr uint16_t function_call_modulo = 10;
#else
constexpr uint16_t function_call_modulo = 16384;
#endif
struct filter_result_iterator_timeout_info {
filter_result_iterator_timeout_info(uint64_t search_begin_us, uint64_t search_stop_us);
uint16_t function_call_counter = 0;
uint64_t search_begin_us = 0;
uint64_t search_stop_us = UINT64_MAX;
};
class filter_result_iterator_t {
private:
std::string collection_name;
@ -170,6 +185,8 @@ private:
bool delete_filter_node = false;
std::unique_ptr<filter_result_iterator_timeout_info> timeout_info;
/// Initializes the state of iterator node after it's creation.
void init();
@ -192,15 +209,18 @@ private:
/// Collects n doc ids while advancing the iterator. The iterator may become invalid during this operation.
/// **The references are moved from filter_result_iterator_t.
void get_n_ids(const uint32_t& n, filter_result_t*& result);
void get_n_ids(const uint32_t& n, filter_result_t*& result, const bool& override_timeout = false);
/// Updates `validity` of the iterator to `timed_out` if condition is met. Assumes `timeout_info` is not null.
inline bool is_timed_out();
public:
uint32_t seq_id = 0;
/// Collection name -> references
std::map<std::string, reference_filter_result_t> reference;
/// Set to false when this iterator or it's subtree becomes invalid.
bool is_valid = true;
/// In case of a complex filter query, validity of a node is dependent on it's sub-nodes.
enum {timed_out = -1, invalid, valid} validity = valid;
/// Initialization status of the iterator.
Option<bool> status = Option(true);
@ -212,10 +232,12 @@ public:
filter_result_iterator_t() = default;
explicit filter_result_iterator_t(uint32_t* ids, const uint32_t& ids_count);
explicit filter_result_iterator_t(uint32_t* ids, const uint32_t& ids_count,
uint64_t search_begin_us = 0, uint64_t search_stop_us = UINT64_MAX);
explicit filter_result_iterator_t(const std::string collection_name,
Index const* const index, filter_node_t const* const filter_node);
explicit filter_result_iterator_t(const std::string& collection_name,
Index const* const index, filter_node_t const* const filter_node,
uint64_t search_begin_us = 0, uint64_t search_stop_us = UINT64_MAX);
~filter_result_iterator_t();
@ -230,10 +252,10 @@ public:
/// Returns a tri-state:
/// 0: id is not valid
/// 1: id is valid
/// -1: end of iterator
/// -1: end of iterator / timed out
///
/// Handles moving the individual iterators internally.
[[nodiscard]] int valid(uint32_t id);
[[nodiscard]] int is_valid(uint32_t id);
/// Advances the iterator to get the next value of doc and reference. The iterator may become invalid during this
/// operation.
@ -244,17 +266,17 @@ public:
void get_n_ids(const uint32_t& n,
uint32_t& excluded_result_index,
uint32_t const* const excluded_result_ids, const size_t& excluded_result_ids_size,
filter_result_t*& result);
filter_result_t*& result, const bool& override_timeout = false);
/// Advances the iterator until the doc value reaches or just overshoots id. The iterator may become invalid during
/// this operation.
void skip_to(uint32_t id);
void skip_to(uint32_t id, const bool& override_timeout = false);
/// Returns true if at least one id from the posting list object matches the filter.
bool contains_atleast_one(const void* obj);
/// Returns to the initial state of the iterator.
void reset();
void reset(const bool& override_timeout = false);
/// Iterates and collects all the filter ids into filter_array.
/// \return size of the filter array

View File

@ -300,7 +300,7 @@ public:
}
filter_result_iterator->reset();
return filter_result_iterator->valid(id) == 1;
return filter_result_iterator->is_valid(id) == 1;
}
};

View File

@ -1017,7 +1017,8 @@ bool validate_and_add_leaf(art_leaf* leaf,
}
if(prev_token.empty() || !prev_leaf) {
if (filter_result_iterator->is_valid && !filter_result_iterator->contains_atleast_one(leaf->values)) {
if (filter_result_iterator->validity == filter_result_iterator_t::valid &&
!filter_result_iterator->contains_atleast_one(leaf->values)) {
return false;
}
} else {
@ -1177,8 +1178,9 @@ int art_topk_iter(const art_node *root, token_ordering token_order, size_t max_r
exclude_leaves, results);
filter_result_iterator->reset();
if (++num_processed % 1024 == 0 && (microseconds(
std::chrono::system_clock::now().time_since_epoch()).count() - search_begin_us) > search_stop_us) {
if (filter_result_iterator->validity == filter_result_iterator_t::timed_out ||
(++num_processed % 1024 == 0 && (microseconds(
std::chrono::system_clock::now().time_since_epoch()).count() - search_begin_us) > search_stop_us)) {
search_cutoff = true;
break;
}

View File

@ -1,3 +1,4 @@
#include <memory>
#include <queue>
#include <id_list.h>
#include <s2/s2point.h>
@ -192,19 +193,19 @@ void filter_result_t::or_filter_results(const filter_result_t& a, const filter_r
}
void filter_result_iterator_t::and_filter_iterators() {
while (left_it->is_valid && right_it->is_valid) {
while (left_it->validity && right_it->validity) {
while (left_it->seq_id < right_it->seq_id) {
left_it->skip_to(right_it->seq_id);
if (!left_it->is_valid) {
is_valid = false;
if (!left_it->validity) {
validity = invalid;
return;
}
}
while (left_it->seq_id > right_it->seq_id) {
right_it->skip_to(left_it->seq_id);
if (!right_it->is_valid) {
is_valid = false;
if (!right_it->validity) {
validity = invalid;
return;
}
}
@ -224,11 +225,11 @@ void filter_result_iterator_t::and_filter_iterators() {
}
}
is_valid = false;
validity = invalid;
}
void filter_result_iterator_t::or_filter_iterators() {
if (left_it->is_valid && right_it->is_valid) {
if (left_it->validity && right_it->validity) {
if (left_it->seq_id < right_it->seq_id) {
seq_id = left_it->seq_id;
reference.clear();
@ -264,7 +265,7 @@ void filter_result_iterator_t::or_filter_iterators() {
return;
}
if (left_it->is_valid) {
if (left_it->validity) {
seq_id = left_it->seq_id;
reference.clear();
@ -275,7 +276,7 @@ void filter_result_iterator_t::or_filter_iterators() {
return;
}
if (right_it->is_valid) {
if (right_it->validity) {
seq_id = right_it->seq_id;
reference.clear();
@ -286,7 +287,7 @@ void filter_result_iterator_t::or_filter_iterators() {
return;
}
is_valid = false;
validity = invalid;
}
void filter_result_iterator_t::advance_string_filter_token_iterators() {
@ -407,18 +408,22 @@ void filter_result_iterator_t::get_string_filter_next_match(const bool& field_is
seq_id = lowest_id;
}
is_valid = one_is_valid;
validity = one_is_valid ? valid : invalid;
}
void filter_result_iterator_t::next() {
if (!is_valid) {
if (validity != valid) {
return;
}
if (timeout_info != nullptr && is_timed_out()) {
return;
}
// No need to traverse iterator tree if there's only one filter or compute_result() has been called.
if (is_filter_result_initialized) {
if (++result_index >= filter_result.count) {
is_valid = false;
validity = invalid;
return;
}
@ -457,7 +462,7 @@ void filter_result_iterator_t::next() {
const filter a_filter = filter_node->filter_exp;
if (!index->field_is_indexed(a_filter.field_name)) {
is_valid = false;
validity = invalid;
return;
}
@ -474,15 +479,15 @@ void filter_result_iterator_t::next() {
previous_match = seq_id;
advance_string_filter_token_iterators();
get_string_filter_next_match(f.is_array());
} while (is_valid && previous_match + 1 == seq_id);
} while (validity && previous_match + 1 == seq_id);
if (!is_valid) {
if (!validity) {
// We've reached the end of the index, no possible matches pending.
if (previous_match >= index->seq_ids->last_id()) {
return;
}
is_valid = true;
validity = valid;
result_index = index->seq_ids->last_id() + 1;
seq_id = previous_match + 1;
return;
@ -540,8 +545,8 @@ void filter_result_iterator_t::get_string_filter_first_match(const bool& field_i
if (filter_node->filter_exp.apply_not_equals) {
// filter didn't match any id. So by applying not equals, every id in the index is a match.
if (!is_valid) {
is_valid = true;
if (!validity) {
validity = valid;
seq_id = 0;
result_index = index->seq_ids->last_id() + 1;
return;
@ -560,15 +565,15 @@ void filter_result_iterator_t::get_string_filter_first_match(const bool& field_i
previous_match = seq_id;
advance_string_filter_token_iterators();
get_string_filter_next_match(field_is_array);
} while (is_valid && previous_match + 1 == seq_id);
} while (validity && previous_match + 1 == seq_id);
if (!is_valid) {
if (!validity) {
// filter matched all the ids in the index. So for not equals, there's no match.
if (previous_match >= index->seq_ids->last_id()) {
return;
}
is_valid = true;
validity = valid;
result_index = index->seq_ids->last_id() + 1;
seq_id = previous_match + 1;
return;
@ -611,7 +616,7 @@ void filter_result_iterator_t::init() {
auto ref_collection = cm.get_collection(ref_collection_name);
if (ref_collection == nullptr) {
status = Option<bool>(400, "Referenced collection `" + ref_collection_name + "` not found.");
is_valid = false;
validity = invalid;
return;
}
@ -620,7 +625,7 @@ void filter_result_iterator_t::init() {
has_reference = ref_collection->is_referenced_in(collection_name);
if (!is_referenced && !has_reference) {
status = Option<bool>(400, "Failed to join on `" + ref_collection_name + "`: No reference field found.");
is_valid = false;
validity = invalid;
return;
}
@ -632,7 +637,7 @@ void filter_result_iterator_t::init() {
if (!reference_filter_op.ok()) {
status = Option<bool>(400, "Failed to join on `" + a_filter.referenced_collection_name
+ "` collection: " + reference_filter_op.error());
is_valid = false;
validity = invalid;
return;
}
} else if (has_reference) {
@ -643,14 +648,14 @@ void filter_result_iterator_t::init() {
if (!reference_filter_op.ok()) {
status = Option<bool>(400, "Failed to join on `" + a_filter.referenced_collection_name
+ "` collection: " + reference_filter_op.error());
is_valid = false;
validity = invalid;
return;
}
auto get_reference_field_op = ref_collection->get_referenced_in_field_with_lock(collection_name);
if (!get_reference_field_op.ok()) {
status = Option<bool>(get_reference_field_op.code(), get_reference_field_op.error());
is_valid = false;
validity = invalid;
return;
}
@ -669,7 +674,7 @@ void filter_result_iterator_t::init() {
auto filter_init_op = fit.init_status();
if (!filter_init_op.ok()) {
status = Option<bool>(filter_init_op.code(), filter_init_op.error());
is_valid = false;
validity = invalid;
return;
}
@ -677,7 +682,7 @@ void filter_result_iterator_t::init() {
}
if (filter_result.count == 0) {
is_valid = false;
validity = invalid;
return;
}
@ -711,7 +716,7 @@ void filter_result_iterator_t::init() {
}
if (filter_result.count == 0) {
is_valid = false;
validity = invalid;
return;
}
@ -723,7 +728,7 @@ void filter_result_iterator_t::init() {
if (!index->field_is_indexed(a_filter.field_name)) {
status = Option<bool>(400, "Cannot filter on non-indexed field `" + a_filter.field_name + "`.");
is_valid = false;
validity = invalid;
return;
}
@ -794,7 +799,7 @@ void filter_result_iterator_t::init() {
}
if (filter_result.count == 0) {
is_valid = false;
validity = invalid;
return;
}
@ -869,7 +874,7 @@ void filter_result_iterator_t::init() {
}
if (filter_result.count == 0) {
is_valid = false;
validity = invalid;
return;
}
@ -930,7 +935,7 @@ void filter_result_iterator_t::init() {
}
if (filter_result.count == 0) {
is_valid = false;
validity = invalid;
return;
}
@ -979,7 +984,7 @@ void filter_result_iterator_t::init() {
status = Option<bool>(400, "Polygon" + (a_filter.values.size() > 1 ?
" at position " + std::to_string(fi + 1) : "")
+ " is invalid: " + error.text());
is_valid = false;
validity = invalid;
return;
} else {
query_region = loop;
@ -1081,7 +1086,7 @@ void filter_result_iterator_t::init() {
}
if (filter_result.count == 0) {
is_valid = false;
validity = invalid;
return;
}
@ -1142,8 +1147,8 @@ void filter_result_iterator_t::init() {
}
}
void filter_result_iterator_t::skip_to(uint32_t id) {
if (!is_valid) {
void filter_result_iterator_t::skip_to(uint32_t id, const bool& override_timeout) {
if (validity == invalid || (!override_timeout && timeout_info != nullptr && is_timed_out())) {
return;
}
@ -1152,7 +1157,7 @@ void filter_result_iterator_t::skip_to(uint32_t id) {
ArrayUtils::skip_index_to_id(result_index, filter_result.docs, filter_result.count, id);
if (result_index >= filter_result.count) {
is_valid = false;
validity = invalid;
return;
}
@ -1183,7 +1188,7 @@ void filter_result_iterator_t::skip_to(uint32_t id) {
const filter a_filter = filter_node->filter_exp;
if (!index->field_is_indexed(a_filter.field_name)) {
is_valid = false;
validity = invalid;
return;
}
@ -1209,16 +1214,16 @@ void filter_result_iterator_t::skip_to(uint32_t id) {
previous_match = seq_id;
advance_string_filter_token_iterators();
get_string_filter_next_match(f.is_array());
} while (is_valid && previous_match + 1 == seq_id);
} while (is_valid && seq_id <= id);
} while (validity && previous_match + 1 == seq_id);
} while (validity && seq_id <= id);
if (!is_valid) {
if (!validity) {
// filter matched all the ids in the index. So for not equals, there's no match.
if (previous_match >= index->seq_ids->last_id()) {
return;
}
is_valid = true;
validity = valid;
seq_id = previous_match + 1;
result_index = index->seq_ids->last_id() + 1;
@ -1257,33 +1262,38 @@ void filter_result_iterator_t::skip_to(uint32_t id) {
}
}
int filter_result_iterator_t::valid(uint32_t id) {
if (!is_valid) {
int filter_result_iterator_t::is_valid(uint32_t id) {
if (validity != valid) {
return -1;
}
// No need to traverse iterator tree if there's only one filter or compute_result() has been called.
if (is_filter_result_initialized) {
skip_to(id);
return is_valid ? (seq_id == id ? 1 : 0) : -1;
return validity ? (seq_id == id ? 1 : 0) : -1;
}
if (timeout_info != nullptr && is_timed_out()) {
return -1;
}
if (filter_node->isOperator) {
auto left_valid = left_it->valid(id), right_valid = right_it->valid(id);
// We only need to consider only valid/invalid state since child nodes can never time out.
auto left_validity = left_it->is_valid(id), right_validity = right_it->is_valid(id);
if (filter_node->filter_operator == AND) {
is_valid = left_it->is_valid && right_it->is_valid;
validity = (left_it->validity == valid && right_it->validity == valid) ? valid : invalid;
if (left_valid < 1 || right_valid < 1) {
if (left_valid == -1 || right_valid == -1) {
if (left_validity < 1 || right_validity < 1) {
if (left_validity == -1 || right_validity == -1) {
return -1;
}
// id did not match the filter but both of the sub-iterators are still valid.
// Updating seq_id to the next potential match.
if (left_valid == 0 && right_valid == 0) {
if (left_validity == 0 && right_validity == 0) {
seq_id = std::max(left_it->seq_id, right_it->seq_id);
} else if (left_valid == 0) {
} else if (left_validity == 0) {
seq_id = left_it->seq_id;
} else {
seq_id = right_it->seq_id;
@ -1295,18 +1305,18 @@ int filter_result_iterator_t::valid(uint32_t id) {
seq_id = id;
return 1;
} else {
is_valid = left_it->is_valid || right_it->is_valid;
validity = (left_it->validity == valid || right_it->validity == valid) ? valid : invalid;
if (left_valid < 1 && right_valid < 1) {
if (left_valid == -1 && right_valid == -1) {
if (left_validity < 1 && right_validity < 1) {
if (left_validity == -1 && right_validity == -1) {
return -1;
}
// id did not match the filter; both of the sub-iterators or one of them might be valid.
// Updating seq_id to the next match.
if (left_valid == 0 && right_valid == 0) {
if (left_validity == 0 && right_validity == 0) {
seq_id = std::min(left_it->seq_id, right_it->seq_id);
} else if (left_valid == 0) {
} else if (left_validity == 0) {
seq_id = left_it->seq_id;
} else {
seq_id = right_it->seq_id;
@ -1321,7 +1331,7 @@ int filter_result_iterator_t::valid(uint32_t id) {
}
skip_to(id);
return is_valid ? (seq_id == id ? 1 : 0) : -1;
return validity ? (seq_id == id ? 1 : 0) : -1;
}
Option<bool> filter_result_iterator_t::init_status() {
@ -1339,7 +1349,7 @@ bool filter_result_iterator_t::contains_atleast_one(const void *obj) {
compact_posting_list_t* list = COMPACT_POSTING_PTR(obj);
size_t i = 0;
while(i < list->length && is_valid) {
while(i < list->length && validity == valid) {
size_t num_existing_offsets = list->id_offsets[i];
size_t existing_id = list->id_offsets[i + num_existing_offsets + 1];
@ -1358,7 +1368,7 @@ bool filter_result_iterator_t::contains_atleast_one(const void *obj) {
auto list = (posting_list_t*)(obj);
posting_list_t::iterator_t it = list->new_iterator();
while(it.valid() && is_valid) {
while(it.valid() && validity == valid) {
uint32_t id = it.id();
if(id == seq_id) {
@ -1376,15 +1386,19 @@ bool filter_result_iterator_t::contains_atleast_one(const void *obj) {
return false;
}
void filter_result_iterator_t::reset() {
void filter_result_iterator_t::reset(const bool& override_timeout) {
if (filter_node == nullptr) {
return;
}
if (!override_timeout && timeout_info != nullptr && is_timed_out()) {
return;
}
// No need to traverse iterator tree if there's only one filter or compute_result() has been called.
if (is_filter_result_initialized) {
if (filter_result.count == 0) {
is_valid = false;
validity = invalid;
return;
}
@ -1397,7 +1411,7 @@ void filter_result_iterator_t::reset() {
reference.insert(ref.begin(), ref.end());
}
is_valid = true;
validity = valid;
return;
}
@ -1405,7 +1419,7 @@ void filter_result_iterator_t::reset() {
// Reset the subtrees then apply operators to arrive at the first valid doc.
left_it->reset();
right_it->reset();
is_valid = true;
validity = valid;
if (filter_node->filter_operator == AND) {
and_filter_iterators();
@ -1440,7 +1454,7 @@ void filter_result_iterator_t::reset() {
}
uint32_t filter_result_iterator_t::to_filter_id_array(uint32_t*& filter_array) {
if (!is_valid) {
if (validity != valid) {
return 0;
}
@ -1454,7 +1468,7 @@ uint32_t filter_result_iterator_t::to_filter_id_array(uint32_t*& filter_array) {
do {
filter_ids.push_back(seq_id);
next();
} while (is_valid);
} while (validity == valid);
filter_array = new uint32_t[filter_ids.size()];
std::copy(filter_ids.begin(), filter_ids.end(), filter_array);
@ -1463,7 +1477,7 @@ uint32_t filter_result_iterator_t::to_filter_id_array(uint32_t*& filter_array) {
}
uint32_t filter_result_iterator_t::and_scalar(const uint32_t* A, const uint32_t& lenA, uint32_t*& results) {
if (!is_valid) {
if (validity != valid) {
return 0;
}
@ -1473,7 +1487,7 @@ uint32_t filter_result_iterator_t::and_scalar(const uint32_t* A, const uint32_t&
std::vector<uint32_t> filter_ids;
for (uint32_t i = 0; i < lenA; i++) {
auto result = valid(A[i]);
auto result = is_valid(A[i]);
if (result == -1) {
break;
@ -1495,7 +1509,7 @@ uint32_t filter_result_iterator_t::and_scalar(const uint32_t* A, const uint32_t&
}
void filter_result_iterator_t::and_scalar(const uint32_t* A, const uint32_t& lenA, filter_result_t& result) {
if (!is_valid) {
if (validity != valid) {
return;
}
@ -1507,7 +1521,7 @@ void filter_result_iterator_t::and_scalar(const uint32_t* A, const uint32_t& len
std::vector<uint32_t> filter_ids;
for (uint32_t i = 0; i < lenA; i++) {
auto _result = valid(A[i]);
auto _result = is_valid(A[i]);
if (_result == -1) {
break;
@ -1534,7 +1548,7 @@ void filter_result_iterator_t::and_scalar(const uint32_t* A, const uint32_t& len
std::vector<uint32_t> match_indexes;
for (uint32_t i = 0; i < lenA; i++) {
auto _result = valid(A[i]);
auto _result = is_valid(A[i]);
if (_result == -1) {
break;
@ -1559,16 +1573,22 @@ void filter_result_iterator_t::and_scalar(const uint32_t* A, const uint32_t& len
}
}
filter_result_iterator_t::filter_result_iterator_t(const std::string collection_name, const Index *const index,
const filter_node_t *const filter_node) :
filter_result_iterator_t::filter_result_iterator_t(const std::string& collection_name, const Index *const index,
const filter_node_t *const filter_node,
uint64_t search_begin, uint64_t search_stop) :
collection_name(collection_name),
index(index),
filter_node(filter_node) {
if (filter_node == nullptr) {
is_valid = false;
validity = invalid;
return;
}
// Only initialize timeout_info in the root node. We won't pass search_begin/search_stop parameters to the sub-nodes.
if (search_stop != UINT64_MAX) {
timeout_info = std::make_unique<filter_result_iterator_timeout_info>(search_begin, search_stop);
}
// Generate the iterator tree and then initialize each node.
if (filter_node->isOperator) {
left_it = new filter_result_iterator_t(collection_name, index, filter_node->left);
@ -1577,7 +1597,7 @@ filter_result_iterator_t::filter_result_iterator_t(const std::string collection_
init();
if (!is_valid) {
if (!validity) {
this->approx_filter_ids_length = 0;
}
}
@ -1625,7 +1645,7 @@ filter_result_iterator_t& filter_result_iterator_t::operator=(filter_result_iter
posting_list_iterators = std::move(obj.posting_list_iterators);
expanded_plists = std::move(obj.expanded_plists);
is_valid = obj.is_valid;
validity = obj.validity;
seq_id = obj.seq_id;
reference = std::move(obj.reference);
@ -1637,11 +1657,25 @@ filter_result_iterator_t& filter_result_iterator_t::operator=(filter_result_iter
return *this;
}
void filter_result_iterator_t::get_n_ids(const uint32_t& n, filter_result_t*& result) {
void filter_result_iterator_t::get_n_ids(const uint32_t& n, filter_result_t*& result, const bool& override_timeout) {
if (!is_filter_result_initialized) {
return;
}
if (override_timeout) {
result_index = 0;
} else if (timeout_info != nullptr) {
// In Index::search_wildcard number of calls to get_n_ids will be min(number of threads, filter match ids).
// Therefore, `timeout_info->function_call_counter` won't reach `function_call_modulo` if only incremented on
// function call.
if (n > function_call_modulo) {
timeout_info->function_call_counter = function_call_modulo - 1;
}
if (is_timed_out()) {
return;
}
}
auto result_length = result->count = std::min(n, filter_result.count - result_index);
result->docs = new uint32_t[result_length];
if (filter_result.coll_to_references != nullptr) {
@ -1661,16 +1695,18 @@ void filter_result_iterator_t::get_n_ids(const uint32_t& n, filter_result_t*& re
result_reference = std::move(filter_result.coll_to_references[result_index]);
}
is_valid = result_index < filter_result.count;
if (!override_timeout) {
validity = result_index < filter_result.count ? valid : invalid;
}
}
void filter_result_iterator_t::get_n_ids(const uint32_t& n,
uint32_t& excluded_result_index,
uint32_t const* const excluded_result_ids, const size_t& excluded_result_ids_size,
filter_result_t*& result) {
filter_result_t*& result, const bool& override_timeout) {
if (excluded_result_ids == nullptr || excluded_result_ids_size == 0 ||
excluded_result_index >= excluded_result_ids_size) {
return get_n_ids(n, result);
return get_n_ids(n, result, override_timeout);
}
// This method is only called in Index::search_wildcard after filter_result_iterator_t::compute_result.
@ -1678,6 +1714,20 @@ void filter_result_iterator_t::get_n_ids(const uint32_t& n,
return;
}
if (override_timeout) {
result_index = 0;
} else if (timeout_info != nullptr) {
// In Index::search_wildcard number of calls to get_n_ids will be min(number of threads, filter match ids).
// Therefore, `timeout_info->function_call_counter` won't reach `function_call_modulo` if only incremented on
// function call.
if (n > function_call_modulo) {
timeout_info->function_call_counter = function_call_modulo - 1;
}
if (is_timed_out()) {
return;
}
}
std::vector<uint32_t> match_indexes;
for (uint32_t count = 0; count < n && result_index < filter_result.count; result_index++) {
auto id = filter_result.docs[result_index];
@ -1708,7 +1758,9 @@ void filter_result_iterator_t::get_n_ids(const uint32_t& n,
result_reference = std::move(filter_result.coll_to_references[match_index]);
}
is_valid = result_index < filter_result.count;
if (!override_timeout) {
validity = result_index < filter_result.count ? valid : invalid;
}
}
filter_result_iterator_t::filter_result_iterator_t(uint32_t approx_filter_ids_length) :
@ -1717,29 +1769,34 @@ filter_result_iterator_t::filter_result_iterator_t(uint32_t approx_filter_ids_le
delete_filter_node = true;
}
filter_result_iterator_t::filter_result_iterator_t(uint32_t* ids, const uint32_t& ids_count) {
filter_result_iterator_t::filter_result_iterator_t(uint32_t* ids, const uint32_t& ids_count,
uint64_t search_begin, uint64_t search_stop) {
filter_result.count = approx_filter_ids_length = ids_count;
filter_result.docs = ids;
is_valid = ids_count > 0;
validity = ids_count > 0 ? valid : invalid;
if (is_valid) {
if (validity) {
seq_id = filter_result.docs[result_index];
is_filter_result_initialized = true;
filter_node = new filter_node_t({"dummy", {}, {}});
delete_filter_node = true;
if (search_stop != UINT64_MAX) {
timeout_info = std::make_unique<filter_result_iterator_timeout_info>(search_begin, search_stop);
}
}
}
void filter_result_iterator_t::add_phrase_ids(filter_result_iterator_t*& filter_result_iterator,
void filter_result_iterator_t::add_phrase_ids(filter_result_iterator_t*& fit,
uint32_t* phrase_result_ids, const uint32_t& phrase_result_count) {
auto root_iterator = new filter_result_iterator_t(std::min(phrase_result_count, filter_result_iterator->approx_filter_ids_length));
auto root_iterator = new filter_result_iterator_t(std::min(phrase_result_count, fit->approx_filter_ids_length));
root_iterator->left_it = new filter_result_iterator_t(phrase_result_ids, phrase_result_count);
root_iterator->right_it = filter_result_iterator;
root_iterator->right_it = fit;
auto& left_it = root_iterator->left_it;
auto& right_it = root_iterator->right_it;
while (left_it->is_valid && right_it->is_valid && left_it->seq_id != right_it->seq_id) {
while (left_it->validity && right_it->validity && left_it->seq_id != right_it->seq_id) {
if (left_it->seq_id < right_it->seq_id) {
left_it->skip_to(right_it->seq_id);
} else {
@ -1747,19 +1804,25 @@ void filter_result_iterator_t::add_phrase_ids(filter_result_iterator_t*& filter_
}
}
root_iterator->is_valid = left_it->is_valid && right_it->is_valid;
root_iterator->timeout_info = std::move(fit->timeout_info);
root_iterator->validity = (left_it->validity == timed_out || right_it->validity == timed_out) ? timed_out :
(left_it->validity == invalid || right_it->validity == invalid) ? invalid : valid;
root_iterator->seq_id = left_it->seq_id;
filter_result_iterator = root_iterator;
fit = root_iterator;
}
void filter_result_iterator_t::compute_result() {
if (filter_node == nullptr) {
is_valid = false;
validity = invalid;
is_filter_result_initialized = false;
LOG(ERROR) << "filter_node is null";
return;
}
if (timeout_info != nullptr && is_timed_out()) {
return;
}
if (filter_node->isOperator) {
left_it->compute_result();
right_it->compute_result();
@ -1773,7 +1836,7 @@ void filter_result_iterator_t::compute_result() {
// In a complex filter query a sub-expression might not match any document while the full expression does match
// at least one document. If the full expression doesn't match any document, we return early in the search.
if (filter_result.count == 0) {
is_valid = false;
validity = invalid;
is_filter_result_initialized = true;
return;
}
@ -1882,7 +1945,7 @@ void filter_result_iterator_t::compute_result() {
}
if (filter_result.count == 0) {
is_valid = false;
validity = invalid;
return;
}
@ -1891,3 +1954,18 @@ void filter_result_iterator_t::compute_result() {
is_filter_result_initialized = true;
approx_filter_ids_length = filter_result.count;
}
bool filter_result_iterator_t::is_timed_out() {
if (validity == timed_out ||
(++(timeout_info->function_call_counter) % function_call_modulo == 0 && (std::chrono::duration_cast<std::chrono::microseconds>(
std::chrono::system_clock::now().time_since_epoch()).count() - timeout_info->search_begin_us) > timeout_info->search_stop_us)) {
validity = timed_out;
return true;
}
return false;
}
filter_result_iterator_timeout_info::filter_result_iterator_timeout_info(uint64_t search_begin,
uint64_t search_stop) :
search_begin_us(search_begin),
search_stop_us(search_stop) {}

View File

@ -1641,6 +1641,7 @@ Option<bool> Index::search_all_candidates(const size_t num_search_fields,
query_hashes.insert(qhash);
filter_result_iterator->reset();
search_cutoff = search_cutoff || filter_result_iterator->validity == filter_result_iterator_t::timed_out;
}
return Option<bool>(true);
@ -1790,7 +1791,8 @@ Option<bool> Index::do_filtering_with_lock(filter_node_t* const filter_tree_root
const std::string& collection_name) const {
std::shared_lock lock(mutex);
auto filter_result_iterator = filter_result_iterator_t(collection_name, this, filter_tree_root);
auto filter_result_iterator = filter_result_iterator_t(collection_name, this, filter_tree_root,
search_begin_us, search_stop_us);
auto filter_init_op = filter_result_iterator.init_status();
if (!filter_init_op.ok()) {
return filter_init_op;
@ -1807,7 +1809,8 @@ Option<bool> Index::do_reference_filtering_with_lock(filter_node_t* const filter
const std::string& reference_helper_field_name) const {
std::shared_lock lock(mutex);
auto filter_result_iterator = filter_result_iterator_t(collection_name, this, filter_tree_root);
auto filter_result_iterator = filter_result_iterator_t(collection_name, this, filter_tree_root,
search_begin_us, search_stop_us);
auto filter_init_op = filter_result_iterator.init_status();
if (!filter_init_op.ok()) {
return filter_init_op;
@ -2406,7 +2409,8 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
) const {
std::shared_lock lock(mutex);
auto filter_result_iterator = new filter_result_iterator_t(collection_name, this, filter_tree_root);
auto filter_result_iterator = new filter_result_iterator_t(collection_name, this, filter_tree_root,
search_begin_us, search_stop_us);
std::unique_ptr<filter_result_iterator_t> filter_iterator_guard(filter_result_iterator);
auto filter_init_op = filter_result_iterator->init_status();
@ -2414,7 +2418,7 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
return filter_init_op;
}
if (filter_tree_root != nullptr && !filter_result_iterator->is_valid) {
if (filter_tree_root != nullptr && filter_result_iterator->validity != filter_result_iterator_t::valid) {
return Option(true);
}
@ -2430,6 +2434,7 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
filter_result_iterator, curated_ids, included_ids_map,
included_ids_vec, excluded_group_ids);
filter_result_iterator->reset();
search_cutoff = search_cutoff || filter_result_iterator->validity == filter_result_iterator_t::timed_out;
std::vector<uint32_t> curated_ids_sorted(curated_ids.begin(), curated_ids.end());
std::sort(curated_ids_sorted.begin(), curated_ids_sorted.end());
@ -2462,7 +2467,7 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
// phrase queries are handled as a filtering query
bool is_wildcard_non_phrase_query = is_wildcard_query && field_query_tokens[0].q_phrases.empty();
bool no_filters_provided = (filter_tree_root == nullptr && !filter_result_iterator->is_valid);
bool no_filters_provided = (filter_tree_root == nullptr && !filter_result_iterator->validity == filter_result_iterator_t::valid);
// handle phrase searches
if (!field_query_tokens[0].q_phrases.empty()) {
@ -2555,7 +2560,7 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
uint32_t filter_id_count = 0;
while (!no_filters_provided &&
filter_id_count < vector_query.flat_search_cutoff && filter_result_iterator->is_valid) {
filter_id_count < vector_query.flat_search_cutoff && filter_result_iterator->validity == filter_result_iterator_t::valid) {
auto& seq_id = filter_result_iterator->seq_id;
auto filter_result = single_filter_result_t(seq_id, std::move(filter_result_iterator->reference));
filter_result_iterator->next();
@ -2583,9 +2588,10 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
filter_id_count++;
}
filter_result_iterator->reset();
search_cutoff = search_cutoff || filter_result_iterator->validity == filter_result_iterator_t::timed_out;
if(no_filters_provided ||
(filter_id_count >= vector_query.flat_search_cutoff && filter_result_iterator->is_valid)) {
(filter_id_count >= vector_query.flat_search_cutoff && filter_result_iterator->validity == filter_result_iterator_t::valid)) {
dist_results.clear();
VectorFilterFunctor filterFunctor(filter_result_iterator);
@ -2605,15 +2611,20 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
filter_result_iterator->reset();
if (filter_result_iterator->is_valid && !filter_result_iterator->reference.empty()) {
if (!filter_result_iterator->reference.empty()) {
// We'll have to get the references of each document.
for (auto pair: pairs) {
if (filter_result_iterator->validity == filter_result_iterator_t::timed_out) {
// Overriding timeout since we need to get the references of matched docs.
filter_result_iterator->reset(true);
search_cutoff = true;
}
// The doc_id must be valid otherwise it would've been filtered out upstream.
filter_result_iterator->skip_to(pair.second);
filter_result_iterator->skip_to(pair.second, search_cutoff);
auto filter_result = single_filter_result_t(pair.second,
std::move(filter_result_iterator->reference));
dist_results.emplace_back(pair.first, filter_result);
filter_result_iterator->reset();
}
} else {
for (const auto &pair: pairs) {
@ -2688,7 +2699,8 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
} else {
// if filters were not provided, use the seq_ids index to generate the list of all document ids
if (no_filters_provided) {
filter_result_iterator = new filter_result_iterator_t(seq_ids->uncompress(), seq_ids->num_ids());
filter_result_iterator = new filter_result_iterator_t(seq_ids->uncompress(), seq_ids->num_ids(),
search_begin_us, search_stop_us);
filter_iterator_guard.reset(filter_result_iterator);
}
@ -2703,8 +2715,6 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
if (!search_wildcard_op.ok()) {
return search_wildcard_op;
}
filter_result_iterator->reset();
}
uint32_t _all_result_ids_len = all_result_ids_len;
@ -2831,6 +2841,7 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
}
filter_result_iterator->reset();
search_cutoff = search_cutoff || filter_result_iterator->validity == filter_result_iterator_t::timed_out;
// gather up both original query and synonym queries and do drop tokens
@ -2937,6 +2948,7 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
}
filter_result_iterator->reset();
search_cutoff = search_cutoff || filter_result_iterator->validity == filter_result_iterator_t::timed_out;
if(!vector_query.field_name.empty()) {
// check at least one of sort fields is text match
@ -2969,6 +2981,7 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
dist_labels = field_vector_index->vecdex->searchKnnCloserFirst(vector_query.values.data(), k, &filterFunctor);
}
filter_result_iterator->reset();
search_cutoff = search_cutoff || filter_result_iterator->validity == filter_result_iterator_t::timed_out;
std::vector<std::pair<uint32_t,float>> vec_results;
for (const auto& dist_label : dist_labels) {
@ -3023,14 +3036,15 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
group_by_field_it_vec = get_group_by_field_iterators(group_by_fields);
}
for(size_t res_index = 0; res_index < vec_results.size(); res_index++) {
for(size_t res_index = 0; res_index < vec_results.size() &&
filter_result_iterator->validity != filter_result_iterator_t::timed_out; res_index++) {
auto& vec_result = vec_results[res_index];
auto seq_id = vec_result.first;
filter_result_iterator->skip_to(seq_id);
auto references = std::move(filter_result_iterator->reference);
filter_result_iterator->reset();
KV* found_kv = nullptr;
if(group_limit != 0) {
for(auto& kv : kvs) {
@ -3106,14 +3120,6 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
kv.text_match_score = 0;
kv.vector_distance = vec_result.second;
if (filter_result_iterator->is_valid &&
!filter_result_iterator->reference.empty()) {
// The doc_id must be valid otherwise it would've been filtered out upstream.
filter_result_iterator->skip_to(seq_id);
kv.reference_filter_results = std::move(filter_result_iterator->reference);
filter_result_iterator->reset();
}
auto ret = topster->add(&kv);
vec_search_ids.push_back(seq_id);
@ -3122,6 +3128,7 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
}
}
}
search_cutoff = search_cutoff || filter_result_iterator->validity == filter_result_iterator_t::timed_out;
if(!vec_search_ids.empty()) {
uint32_t* new_all_result_ids = nullptr;
@ -3371,9 +3378,9 @@ void Index::process_curated_ids(const std::vector<std::pair<uint32_t, uint32_t>>
// if `filter_curated_hits` is enabled, we will remove curated hits that don't match filter condition
std::set<uint32_t> included_ids_set;
if(filter_result_iterator->is_valid && filter_curated_hits) {
if(filter_result_iterator->validity == filter_result_iterator_t::valid && filter_curated_hits) {
for (const auto &included_id: included_ids_vec) {
auto result = filter_result_iterator->valid(included_id);
auto result = filter_result_iterator->is_valid(included_id);
if (result == -1) {
break;
@ -3589,6 +3596,10 @@ Option<bool> Index::fuzzy_search_fields(const std::vector<search_field_t>& the_f
costs[token_index], costs[token_index], max_candidates, token_order, prefix_search,
last_token, prev_token, filter_result_iterator, field_leaves, unique_tokens);
filter_result_iterator->reset();
if (filter_result_iterator->validity == filter_result_iterator_t::timed_out) {
search_cutoff = true;
return Option<bool>(true);
}
/*auto timeMillis = std::chrono::duration_cast<std::chrono::milliseconds>(
std::chrono::high_resolution_clock::now() - begin).count();
@ -3620,6 +3631,7 @@ Option<bool> Index::fuzzy_search_fields(const std::vector<search_field_t>& the_f
the_fields, num_search_fields, filter_result_iterator, exclude_token_ids,
exclude_token_ids_size, prev_token_doc_ids, popular_field_ids);
filter_result_iterator->reset();
search_cutoff = search_cutoff || filter_result_iterator->validity == filter_result_iterator_t::timed_out;
for(size_t field_id: query_field_ids) {
auto& the_field = the_fields[field_id];
@ -3643,6 +3655,10 @@ Option<bool> Index::fuzzy_search_fields(const std::vector<search_field_t>& the_f
costs[token_index], costs[token_index], max_candidates, token_order, prefix_search,
false, "", filter_result_iterator, field_leaves, unique_tokens);
filter_result_iterator->reset();
if (filter_result_iterator->validity == filter_result_iterator_t::timed_out) {
search_cutoff = true;
return Option<bool>(true);
}
if(field_leaves.empty()) {
// look at the next field
@ -4894,7 +4910,7 @@ Option<bool> Index::do_phrase_search(const size_t num_search_fields, const std::
collate_included_ids({}, included_ids_map, curated_topster, searched_queries);
// AND phrase id matches with filter ids
if(filter_result_iterator->is_valid) {
if(filter_result_iterator->validity) {
filter_result_iterator_t::add_phrase_ids(filter_result_iterator, phrase_result_ids, phrase_result_count);
} else {
delete filter_result_iterator;
@ -4917,7 +4933,8 @@ Option<bool> Index::do_phrase_search(const size_t num_search_fields, const std::
group_by_field_it_vec = get_group_by_field_iterators(group_by_fields);
}
// populate topster
for(size_t i = 0; i < std::min<size_t>(10000, all_result_ids_len); i++) {
for(size_t i = 0; i < std::min<size_t>(10000, all_result_ids_len) &&
filter_result_iterator->validity == filter_result_iterator_t::valid; i++) {
auto seq_id = filter_result_iterator->seq_id;
auto references = std::move(filter_result_iterator->reference);
filter_result_iterator->next();
@ -4957,6 +4974,7 @@ Option<bool> Index::do_phrase_search(const size_t num_search_fields, const std::
}
}
filter_result_iterator->reset();
search_cutoff = search_cutoff || filter_result_iterator->validity == filter_result_iterator_t::timed_out;
searched_queries.push_back({});
return Option<bool>(true);
@ -5068,7 +5086,7 @@ Option<bool> Index::do_infix_search(const size_t num_search_fields, const std::v
raw_infix_ids_length = infix_ids.size();
}
if(filter_result_iterator->is_valid) {
if(filter_result_iterator->validity == filter_result_iterator_t::valid) {
filter_result_t result;
filter_result_iterator->and_scalar(raw_infix_ids, raw_infix_ids_length, result);
if(raw_infix_ids != &infix_ids[0]) {
@ -5440,11 +5458,15 @@ Option<bool> Index::search_wildcard(filter_node_t const* const& filter_tree_root
uint32_t excluded_result_index = 0;
Option<bool>* compute_sort_score_statuses[num_threads];
for(size_t thread_id = 0; thread_id < num_threads && filter_result_iterator->is_valid; thread_id++) {
for(size_t thread_id = 0; thread_id < num_threads &&
filter_result_iterator->validity == filter_result_iterator_t::valid; thread_id++) {
auto batch_result = new filter_result_t();
filter_result_iterator->get_n_ids(window_size, excluded_result_index, exclude_token_ids,
exclude_token_ids_size, batch_result);
if (batch_result->count == 0) {
delete batch_result;
break;
}
num_queued++;
searched_queries.push_back({});
@ -5530,7 +5552,7 @@ Option<bool> Index::search_wildcard(filter_node_t const* const& filter_tree_root
std::unique_lock<std::mutex> lock_process(m_process);
cv_process.wait(lock_process, [&](){ return num_processed == num_queued; });
search_cutoff = parent_search_cutoff;
search_cutoff = parent_search_cutoff || filter_result_iterator->validity == filter_result_iterator_t::timed_out;
for(size_t thread_id = 0; thread_id < num_processed; thread_id++) {
if (compute_sort_score_statuses[thread_id] != nullptr) {
@ -5558,8 +5580,20 @@ Option<bool> Index::search_wildcard(filter_node_t const* const& filter_tree_root
std::chrono::high_resolution_clock::now() - beginF).count();
LOG(INFO) << "Time for raw scoring: " << timeMillisF;*/
filter_result_iterator->reset();
all_result_ids_len = filter_result_iterator->to_filter_id_array(all_result_ids);
if (filter_result_iterator->validity == filter_result_iterator_t::timed_out) {
auto partial_result = new filter_result_t();
std::unique_ptr<filter_result_t> partial_result_guard(partial_result);
filter_result_iterator->get_n_ids(window_size * num_processed,
excluded_result_index, nullptr, 0, partial_result, true);
all_result_ids_len = partial_result->count;
all_result_ids = partial_result->docs;
partial_result->docs = nullptr;
} else {
filter_result_iterator->reset();
all_result_ids_len = filter_result_iterator->to_filter_id_array(all_result_ids);
search_cutoff = search_cutoff || filter_result_iterator->validity == filter_result_iterator_t::timed_out;
}
return Option<bool>(true);
}
@ -5607,7 +5641,8 @@ void Index::populate_sort_mapping(int* sort_order, std::vector<size_t>& geopoint
auto& eval_exp = sort_fields_std[i].eval;
auto count = sort_fields_std[i].eval_expressions.size();
for (uint32_t j = 0; j < count; j++) {
auto filter_result_iterator = filter_result_iterator_t("", this, &eval_exp.filter_trees[j]);
auto filter_result_iterator = filter_result_iterator_t("", this, &eval_exp.filter_trees[j],
search_begin_us, search_stop_us);
auto filter_init_op = filter_result_iterator.init_status();
if (!filter_init_op.ok()) {
return;

View File

@ -209,7 +209,7 @@ bool or_iterator_t::take_id(result_iter_state_t& istate, uint32_t id, bool& is_e
}
if (istate.fit != nullptr && istate.fit->approx_filter_ids_length > 0) {
if (istate.fit->valid(id) == 1) {
if (istate.fit->is_valid(id) == 1) {
istate.fit->next();
return true;
}
@ -261,7 +261,7 @@ bool or_iterator_t::take_id(result_iter_state_t& istate, uint32_t id, bool& is_e
}
if (istate.fit != nullptr && istate.fit->approx_filter_ids_length > 0) {
if (istate.fit->valid(id) == 1) {
if (istate.fit->is_valid(id) == 1) {
filter_result.seq_id = id;
filter_result.reference_filter_results = std::move(istate.fit->reference);
istate.fit->next();

View File

@ -1879,7 +1879,7 @@ bool result_iter_state_t::is_filter_valid() const {
}
if (fit != nullptr) {
return fit->is_valid;
return fit->validity == filter_result_iterator_t::valid;
}
return false;
@ -1890,7 +1890,7 @@ uint32_t result_iter_state_t::get_filter_id() const {
return filter_ids[filter_ids_index];
}
if (fit != nullptr && fit->is_valid) {
if (fit != nullptr && fit->validity == filter_result_iterator_t::valid) {
return fit->seq_id;
}

View File

@ -5,6 +5,7 @@
#include <collection_manager.h>
#include <filter.h>
#include <posting.h>
#include <chrono>
#include "collection.h"
class FilterTest : public ::testing::Test {
@ -65,7 +66,7 @@ TEST_F(FilterTest, FilterTreeIterator) {
auto iter_null_filter_tree_test = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root);
ASSERT_TRUE(iter_null_filter_tree_test.init_status().ok());
ASSERT_FALSE(iter_null_filter_tree_test.is_valid);
ASSERT_EQ(filter_result_iterator_t::invalid, iter_null_filter_tree_test.validity);
Option<bool> filter_op = filter::parse_filter_query("name: foo", coll->get_schema(), store, doc_id_prefix,
filter_tree_root);
@ -74,7 +75,7 @@ TEST_F(FilterTest, FilterTreeIterator) {
auto iter_no_match_test = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root);
ASSERT_TRUE(iter_no_match_test.init_status().ok());
ASSERT_FALSE(iter_no_match_test.is_valid);
ASSERT_EQ(filter_result_iterator_t::invalid, iter_no_match_test.validity);
delete filter_tree_root;
filter_tree_root = nullptr;
@ -85,7 +86,7 @@ TEST_F(FilterTest, FilterTreeIterator) {
auto iter_no_match_multi_test = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root);
ASSERT_TRUE(iter_no_match_multi_test.init_status().ok());
ASSERT_FALSE(iter_no_match_multi_test.is_valid);
ASSERT_EQ(filter_result_iterator_t::invalid, iter_no_match_multi_test.validity);
delete filter_tree_root;
filter_tree_root = nullptr;
@ -97,11 +98,11 @@ TEST_F(FilterTest, FilterTreeIterator) {
ASSERT_TRUE(iter_contains_test.init_status().ok());
for (uint32_t i = 0; i < 5; i++) {
ASSERT_TRUE(iter_contains_test.is_valid);
ASSERT_EQ(filter_result_iterator_t::valid, iter_contains_test.validity);
ASSERT_EQ(i, iter_contains_test.seq_id);
iter_contains_test.next();
}
ASSERT_FALSE(iter_contains_test.is_valid);
ASSERT_EQ(filter_result_iterator_t::invalid, iter_contains_test.validity);
delete filter_tree_root;
filter_tree_root = nullptr;
@ -113,11 +114,11 @@ TEST_F(FilterTest, FilterTreeIterator) {
ASSERT_TRUE(iter_contains_multi_test.init_status().ok());
for (uint32_t i = 0; i < 5; i++) {
ASSERT_TRUE(iter_contains_multi_test.is_valid);
ASSERT_EQ(filter_result_iterator_t::valid, iter_contains_multi_test.validity);
ASSERT_EQ(i, iter_contains_multi_test.seq_id);
iter_contains_multi_test.next();
}
ASSERT_FALSE(iter_contains_multi_test.is_valid);
ASSERT_EQ(filter_result_iterator_t::invalid, iter_contains_multi_test.validity);
delete filter_tree_root;
filter_tree_root = nullptr;
@ -129,11 +130,11 @@ TEST_F(FilterTest, FilterTreeIterator) {
ASSERT_TRUE(iter_exact_match_1_test.init_status().ok());
for (uint32_t i = 0; i < 5; i++) {
ASSERT_TRUE(iter_exact_match_1_test.is_valid);
ASSERT_EQ(filter_result_iterator_t::valid, iter_exact_match_1_test.validity);
ASSERT_EQ(i, iter_exact_match_1_test.seq_id);
iter_exact_match_1_test.next();
}
ASSERT_FALSE(iter_exact_match_1_test.is_valid);
ASSERT_EQ(filter_result_iterator_t::invalid, iter_exact_match_1_test.validity);
delete filter_tree_root;
filter_tree_root = nullptr;
@ -143,7 +144,7 @@ TEST_F(FilterTest, FilterTreeIterator) {
auto iter_exact_match_2_test = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root);
ASSERT_TRUE(iter_exact_match_2_test.init_status().ok());
ASSERT_FALSE(iter_exact_match_2_test.is_valid);
ASSERT_EQ(filter_result_iterator_t::invalid, iter_exact_match_2_test.validity);
delete filter_tree_root;
filter_tree_root = nullptr;
@ -156,11 +157,11 @@ TEST_F(FilterTest, FilterTreeIterator) {
std::vector<int> expected = {0, 2, 3, 4};
for (auto const& i : expected) {
ASSERT_TRUE(iter_exact_match_multi_test.is_valid);
ASSERT_EQ(filter_result_iterator_t::valid, iter_exact_match_multi_test.validity);
ASSERT_EQ(i, iter_exact_match_multi_test.seq_id);
iter_exact_match_multi_test.next();
}
ASSERT_FALSE(iter_exact_match_multi_test.is_valid);
ASSERT_EQ(filter_result_iterator_t::invalid, iter_exact_match_multi_test.validity);
delete filter_tree_root;
filter_tree_root = nullptr;
@ -173,12 +174,12 @@ TEST_F(FilterTest, FilterTreeIterator) {
expected = {1, 3};
for (auto const& i : expected) {
ASSERT_TRUE(iter_not_equals_test.is_valid);
ASSERT_EQ(filter_result_iterator_t::valid, iter_not_equals_test.validity);
ASSERT_EQ(i, iter_not_equals_test.seq_id);
iter_not_equals_test.next();
}
ASSERT_FALSE(iter_not_equals_test.is_valid);
ASSERT_EQ(filter_result_iterator_t::invalid, iter_not_equals_test.validity);
delete filter_tree_root;
filter_tree_root = nullptr;
@ -189,13 +190,13 @@ TEST_F(FilterTest, FilterTreeIterator) {
auto iter_skip_test1 = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root);
ASSERT_TRUE(iter_skip_test1.init_status().ok());
ASSERT_TRUE(iter_skip_test1.is_valid);
ASSERT_EQ(filter_result_iterator_t::valid, iter_skip_test1.validity);
iter_skip_test1.skip_to(3);
ASSERT_TRUE(iter_skip_test1.is_valid);
ASSERT_EQ(filter_result_iterator_t::valid, iter_skip_test1.validity);
ASSERT_EQ(4, iter_skip_test1.seq_id);
iter_skip_test1.next();
ASSERT_FALSE(iter_skip_test1.is_valid);
ASSERT_EQ(filter_result_iterator_t::invalid, iter_skip_test1.validity);
delete filter_tree_root;
filter_tree_root = nullptr;
@ -206,9 +207,9 @@ TEST_F(FilterTest, FilterTreeIterator) {
auto iter_skip_test2 = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root);
ASSERT_TRUE(iter_skip_test2.init_status().ok());
ASSERT_TRUE(iter_skip_test2.is_valid);
ASSERT_EQ(filter_result_iterator_t::valid, iter_skip_test2.validity);
iter_skip_test2.skip_to(3);
ASSERT_FALSE(iter_skip_test2.is_valid);
ASSERT_EQ(filter_result_iterator_t::invalid, iter_skip_test2.validity);
delete filter_tree_root;
filter_tree_root = nullptr;
@ -219,11 +220,11 @@ TEST_F(FilterTest, FilterTreeIterator) {
auto iter_and_test = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root);
ASSERT_TRUE(iter_and_test.init_status().ok());
ASSERT_TRUE(iter_and_test.is_valid);
ASSERT_EQ(filter_result_iterator_t::valid, iter_and_test.validity);
ASSERT_EQ(1, iter_and_test.seq_id);
iter_and_test.next();
ASSERT_FALSE(iter_and_test.is_valid);
ASSERT_EQ(filter_result_iterator_t::invalid, iter_and_test.validity);
delete filter_tree_root;
filter_tree_root = nullptr;
@ -247,12 +248,12 @@ TEST_F(FilterTest, FilterTreeIterator) {
expected = {2, 4, 5};
for (auto const& i : expected) {
ASSERT_TRUE(iter_or_test.is_valid);
ASSERT_EQ(filter_result_iterator_t::valid, iter_or_test.validity);
ASSERT_EQ(i, iter_or_test.seq_id);
iter_or_test.next();
}
ASSERT_FALSE(iter_or_test.is_valid);
ASSERT_EQ(filter_result_iterator_t::invalid, iter_or_test.validity);
delete filter_tree_root;
filter_tree_root = nullptr;
@ -263,17 +264,17 @@ TEST_F(FilterTest, FilterTreeIterator) {
auto iter_skip_complex_filter_test = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root);
ASSERT_TRUE(iter_skip_complex_filter_test.init_status().ok());
ASSERT_TRUE(iter_skip_complex_filter_test.is_valid);
ASSERT_EQ(filter_result_iterator_t::valid, iter_skip_complex_filter_test.validity);
iter_skip_complex_filter_test.skip_to(4);
expected = {4, 5};
for (auto const& i : expected) {
ASSERT_TRUE(iter_skip_complex_filter_test.is_valid);
ASSERT_EQ(filter_result_iterator_t::valid, iter_skip_complex_filter_test.validity);
ASSERT_EQ(i, iter_skip_complex_filter_test.seq_id);
iter_skip_complex_filter_test.next();
}
ASSERT_FALSE(iter_skip_complex_filter_test.is_valid);
ASSERT_EQ(filter_result_iterator_t::invalid, iter_skip_complex_filter_test.validity);
delete filter_tree_root;
filter_tree_root = nullptr;
@ -287,7 +288,7 @@ TEST_F(FilterTest, FilterTreeIterator) {
std::vector<int> validate_ids = {0, 1, 2, 3, 4, 5, 6}, seq_ids = {0, 2, 2, 4, 4, 5, 5};
expected = {1, 0, 1, 0, 1, 1, -1};
for (uint32_t i = 0; i < validate_ids.size(); i++) {
ASSERT_EQ(expected[i], iter_validate_ids_test1.valid(validate_ids[i]));
ASSERT_EQ(expected[i], iter_validate_ids_test1.is_valid(validate_ids[i]));
ASSERT_EQ(seq_ids[i], iter_validate_ids_test1.seq_id);
}
@ -303,7 +304,7 @@ TEST_F(FilterTest, FilterTreeIterator) {
validate_ids = {0, 1, 2, 3, 4, 5, 6}, seq_ids = {1, 1, 5, 5, 5, 5, 5};
expected = {0, 1, 0, 0, 0, 1, -1};
for (uint32_t i = 0; i < validate_ids.size(); i++) {
ASSERT_EQ(expected[i], iter_validate_ids_test2.valid(validate_ids[i]));
ASSERT_EQ(expected[i], iter_validate_ids_test2.is_valid(validate_ids[i]));
ASSERT_EQ(seq_ids[i], iter_validate_ids_test2.seq_id);
}
@ -319,7 +320,7 @@ TEST_F(FilterTest, FilterTreeIterator) {
validate_ids = {0, 1, 2, 3, 4, 5, 6}, seq_ids = {0, 3, 3, 4, 4, 4, 4};
expected = {1, 0, 0, 0, 1, -1, -1};
for (uint32_t i = 0; i < validate_ids.size(); i++) {
ASSERT_EQ(expected[i], iter_validate_ids_test3.valid(validate_ids[i]));
ASSERT_EQ(expected[i], iter_validate_ids_test3.is_valid(validate_ids[i]));
ASSERT_EQ(seq_ids[i], iter_validate_ids_test3.seq_id);
}
@ -337,7 +338,7 @@ TEST_F(FilterTest, FilterTreeIterator) {
seq_ids = {1, 1, 3, 3, 5, 5, 5};
expected = {0, 1, 0, 1, 0, 1, -1};
for (uint32_t i = 0; i < validate_ids.size(); i++) {
ASSERT_EQ(expected[i], iter_validate_ids_not_equals_filter_test.valid(validate_ids[i]));
ASSERT_EQ(expected[i], iter_validate_ids_not_equals_filter_test.is_valid(validate_ids[i]));
ASSERT_EQ(seq_ids[i], iter_validate_ids_not_equals_filter_test.seq_id);
}
@ -406,20 +407,20 @@ TEST_F(FilterTest, FilterTreeIterator) {
expected = {0, 2, 3, 4};
for (auto const& i : expected) {
ASSERT_TRUE(iter_reset_test.is_valid);
ASSERT_EQ(filter_result_iterator_t::valid, iter_reset_test.validity);
ASSERT_EQ(i, iter_reset_test.seq_id);
iter_reset_test.next();
}
ASSERT_FALSE(iter_reset_test.is_valid);
ASSERT_EQ(filter_result_iterator_t::invalid, iter_reset_test.validity);
iter_reset_test.reset();
for (auto const& i : expected) {
ASSERT_TRUE(iter_reset_test.is_valid);
ASSERT_EQ(filter_result_iterator_t::valid, iter_reset_test.validity);
ASSERT_EQ(i, iter_reset_test.seq_id);
iter_reset_test.next();
}
ASSERT_FALSE(iter_reset_test.is_valid);
ASSERT_EQ(filter_result_iterator_t::invalid, iter_reset_test.validity);
auto iter_move_assignment_test = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root);
@ -428,11 +429,11 @@ TEST_F(FilterTest, FilterTreeIterator) {
expected = {0, 2, 3, 4};
for (auto const& i : expected) {
ASSERT_TRUE(iter_move_assignment_test.is_valid);
ASSERT_EQ(filter_result_iterator_t::valid, iter_move_assignment_test.validity);
ASSERT_EQ(i, iter_move_assignment_test.seq_id);
iter_move_assignment_test.next();
}
ASSERT_FALSE(iter_move_assignment_test.is_valid);
ASSERT_EQ(filter_result_iterator_t::invalid, iter_move_assignment_test.validity);
delete filter_tree_root;
filter_tree_root = nullptr;
@ -453,7 +454,7 @@ TEST_F(FilterTest, FilterTreeIterator) {
for (uint32_t i = 0; i < filter_ids_length; i++) {
ASSERT_EQ(expected[i], filter_ids[i]);
}
ASSERT_FALSE(iter_to_array_test.is_valid);
ASSERT_EQ(filter_result_iterator_t::invalid, iter_to_array_test.validity);
delete[] filter_ids;
@ -470,7 +471,7 @@ TEST_F(FilterTest, FilterTreeIterator) {
for (uint32_t i = 0; i < and_result_length; i++) {
ASSERT_EQ(expected[i], and_result[i]);
}
ASSERT_FALSE(iter_and_scalar_test.is_valid);
ASSERT_EQ(filter_result_iterator_t::invalid, iter_and_scalar_test.validity);
delete[] and_result;
delete filter_tree_root;
@ -493,11 +494,11 @@ TEST_F(FilterTest, FilterTreeIterator) {
auto iter_skip_test3 = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root);
ASSERT_TRUE(iter_skip_test3.init_status().ok());
ASSERT_TRUE(iter_skip_test3.is_valid);
ASSERT_EQ(filter_result_iterator_t::valid, iter_skip_test3.validity);
iter_skip_test3.skip_to(4);
ASSERT_EQ(4, iter_skip_test3.seq_id);
ASSERT_TRUE(iter_skip_test3.is_valid);
ASSERT_EQ(filter_result_iterator_t::valid, iter_skip_test3.validity);
delete filter_tree_root;
@ -509,10 +510,10 @@ TEST_F(FilterTest, FilterTreeIterator) {
auto iter_skip_test4 = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root);
ASSERT_TRUE(iter_skip_test4.init_status().ok());
ASSERT_TRUE(iter_skip_test4.is_valid);
ASSERT_EQ(filter_result_iterator_t::valid, iter_skip_test4.validity);
iter_skip_test4.skip_to(6);
ASSERT_EQ(6, iter_skip_test4.seq_id);
ASSERT_TRUE(iter_skip_test4.is_valid);
ASSERT_EQ(filter_result_iterator_t::valid, iter_skip_test4.validity);
auto iter_add_phrase_ids_test = new filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root);
std::unique_ptr<filter_result_iterator_t> filter_iter_guard(iter_add_phrase_ids_test);
@ -526,8 +527,56 @@ TEST_F(FilterTest, FilterTreeIterator) {
filter_iter_guard.release();
filter_iter_guard.reset(iter_add_phrase_ids_test);
ASSERT_TRUE(iter_add_phrase_ids_test->is_valid);
ASSERT_EQ(filter_result_iterator_t::valid, iter_add_phrase_ids_test->validity);
ASSERT_EQ(6, iter_add_phrase_ids_test->seq_id);
delete filter_tree_root;
}
}
TEST_F(FilterTest, FilterTreeIteratorTimeout) {
auto count = 20;
auto filter_ids = new uint32_t[count];
for (auto i = 0; i < count; i++) {
filter_ids[i] = i;
}
auto filter_iterator = new filter_result_iterator_t(filter_ids, count,
std::chrono::duration_cast<std::chrono::microseconds>(
std::chrono::system_clock::now().time_since_epoch()).count(),
10000000); // Timeout after 10 seconds
std::unique_ptr<filter_result_iterator_t> filter_iter_guard(filter_iterator);
ASSERT_EQ(filter_result_iterator_t::valid, filter_iterator->validity);
std::this_thread::sleep_for(std::chrono::seconds(5));
for (auto i = 0; i < 20; i++) {
ASSERT_EQ(filter_result_iterator_t::valid, filter_iterator->validity);
filter_iterator->next();
}
ASSERT_EQ(filter_result_iterator_t::invalid, filter_iterator->validity); // End of iterator reached.
filter_iterator->reset();
ASSERT_EQ(filter_result_iterator_t::valid, filter_iterator->validity);
std::this_thread::sleep_for(std::chrono::seconds(5));
for (auto i = 0; i < 9; i++) {
ASSERT_EQ(filter_result_iterator_t::valid, filter_iterator->validity);
filter_iterator->next();
}
ASSERT_EQ(filter_result_iterator_t::timed_out, filter_iterator->validity);
filter_iterator->reset();
ASSERT_EQ(filter_result_iterator_t::timed_out, filter_iterator->validity); // Resetting won't help with timeout.
uint32_t excluded_result_index = 0;
auto result = new filter_result_t();
filter_iterator->get_n_ids(count, excluded_result_index, nullptr, 0, result);
ASSERT_EQ(0, result->count); // Shouldn't return results
delete result;
result = new filter_result_t();
filter_iterator->get_n_ids(count, excluded_result_index, nullptr, 0, result, true);
ASSERT_EQ(count, result->count); // With `override_timeout` true, we should get result.
delete result;
}