mirror of
https://github.com/typesense/typesense.git
synced 2025-05-20 13:42:26 +08:00
Merge pull request #1394 from happy-san/filtering_cutoff
Filtering cutoff
This commit is contained in:
commit
b3f8954229
@ -4,6 +4,7 @@
|
||||
#include <map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "option.h"
|
||||
#include "posting_list.h"
|
||||
|
||||
@ -144,6 +145,20 @@ struct filter_result_t {
|
||||
static void copy_references(const filter_result_t& from, filter_result_t& to);
|
||||
};
|
||||
|
||||
#ifdef TEST_BUILD
|
||||
constexpr uint16_t function_call_modulo = 10;
|
||||
#else
|
||||
constexpr uint16_t function_call_modulo = 16384;
|
||||
#endif
|
||||
|
||||
struct filter_result_iterator_timeout_info {
|
||||
filter_result_iterator_timeout_info(uint64_t search_begin_us, uint64_t search_stop_us);
|
||||
|
||||
uint16_t function_call_counter = 0;
|
||||
uint64_t search_begin_us = 0;
|
||||
uint64_t search_stop_us = UINT64_MAX;
|
||||
};
|
||||
|
||||
class filter_result_iterator_t {
|
||||
private:
|
||||
std::string collection_name;
|
||||
@ -170,6 +185,8 @@ private:
|
||||
|
||||
bool delete_filter_node = false;
|
||||
|
||||
std::unique_ptr<filter_result_iterator_timeout_info> timeout_info;
|
||||
|
||||
/// Initializes the state of iterator node after it's creation.
|
||||
void init();
|
||||
|
||||
@ -192,15 +209,18 @@ private:
|
||||
|
||||
/// Collects n doc ids while advancing the iterator. The iterator may become invalid during this operation.
|
||||
/// **The references are moved from filter_result_iterator_t.
|
||||
void get_n_ids(const uint32_t& n, filter_result_t*& result);
|
||||
void get_n_ids(const uint32_t& n, filter_result_t*& result, const bool& override_timeout = false);
|
||||
|
||||
/// Updates `validity` of the iterator to `timed_out` if condition is met. Assumes `timeout_info` is not null.
|
||||
inline bool is_timed_out();
|
||||
|
||||
public:
|
||||
uint32_t seq_id = 0;
|
||||
/// Collection name -> references
|
||||
std::map<std::string, reference_filter_result_t> reference;
|
||||
|
||||
/// Set to false when this iterator or it's subtree becomes invalid.
|
||||
bool is_valid = true;
|
||||
/// In case of a complex filter query, validity of a node is dependent on it's sub-nodes.
|
||||
enum {timed_out = -1, invalid, valid} validity = valid;
|
||||
|
||||
/// Initialization status of the iterator.
|
||||
Option<bool> status = Option(true);
|
||||
@ -212,10 +232,12 @@ public:
|
||||
|
||||
filter_result_iterator_t() = default;
|
||||
|
||||
explicit filter_result_iterator_t(uint32_t* ids, const uint32_t& ids_count);
|
||||
explicit filter_result_iterator_t(uint32_t* ids, const uint32_t& ids_count,
|
||||
uint64_t search_begin_us = 0, uint64_t search_stop_us = UINT64_MAX);
|
||||
|
||||
explicit filter_result_iterator_t(const std::string collection_name,
|
||||
Index const* const index, filter_node_t const* const filter_node);
|
||||
explicit filter_result_iterator_t(const std::string& collection_name,
|
||||
Index const* const index, filter_node_t const* const filter_node,
|
||||
uint64_t search_begin_us = 0, uint64_t search_stop_us = UINT64_MAX);
|
||||
|
||||
~filter_result_iterator_t();
|
||||
|
||||
@ -230,10 +252,10 @@ public:
|
||||
/// Returns a tri-state:
|
||||
/// 0: id is not valid
|
||||
/// 1: id is valid
|
||||
/// -1: end of iterator
|
||||
/// -1: end of iterator / timed out
|
||||
///
|
||||
/// Handles moving the individual iterators internally.
|
||||
[[nodiscard]] int valid(uint32_t id);
|
||||
[[nodiscard]] int is_valid(uint32_t id);
|
||||
|
||||
/// Advances the iterator to get the next value of doc and reference. The iterator may become invalid during this
|
||||
/// operation.
|
||||
@ -244,17 +266,17 @@ public:
|
||||
void get_n_ids(const uint32_t& n,
|
||||
uint32_t& excluded_result_index,
|
||||
uint32_t const* const excluded_result_ids, const size_t& excluded_result_ids_size,
|
||||
filter_result_t*& result);
|
||||
filter_result_t*& result, const bool& override_timeout = false);
|
||||
|
||||
/// Advances the iterator until the doc value reaches or just overshoots id. The iterator may become invalid during
|
||||
/// this operation.
|
||||
void skip_to(uint32_t id);
|
||||
void skip_to(uint32_t id, const bool& override_timeout = false);
|
||||
|
||||
/// Returns true if at least one id from the posting list object matches the filter.
|
||||
bool contains_atleast_one(const void* obj);
|
||||
|
||||
/// Returns to the initial state of the iterator.
|
||||
void reset();
|
||||
void reset(const bool& override_timeout = false);
|
||||
|
||||
/// Iterates and collects all the filter ids into filter_array.
|
||||
/// \return size of the filter array
|
||||
|
@ -300,7 +300,7 @@ public:
|
||||
}
|
||||
|
||||
filter_result_iterator->reset();
|
||||
return filter_result_iterator->valid(id) == 1;
|
||||
return filter_result_iterator->is_valid(id) == 1;
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -1017,7 +1017,8 @@ bool validate_and_add_leaf(art_leaf* leaf,
|
||||
}
|
||||
|
||||
if(prev_token.empty() || !prev_leaf) {
|
||||
if (filter_result_iterator->is_valid && !filter_result_iterator->contains_atleast_one(leaf->values)) {
|
||||
if (filter_result_iterator->validity == filter_result_iterator_t::valid &&
|
||||
!filter_result_iterator->contains_atleast_one(leaf->values)) {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
@ -1177,8 +1178,9 @@ int art_topk_iter(const art_node *root, token_ordering token_order, size_t max_r
|
||||
exclude_leaves, results);
|
||||
filter_result_iterator->reset();
|
||||
|
||||
if (++num_processed % 1024 == 0 && (microseconds(
|
||||
std::chrono::system_clock::now().time_since_epoch()).count() - search_begin_us) > search_stop_us) {
|
||||
if (filter_result_iterator->validity == filter_result_iterator_t::timed_out ||
|
||||
(++num_processed % 1024 == 0 && (microseconds(
|
||||
std::chrono::system_clock::now().time_since_epoch()).count() - search_begin_us) > search_stop_us)) {
|
||||
search_cutoff = true;
|
||||
break;
|
||||
}
|
||||
|
@ -1,3 +1,4 @@
|
||||
#include <memory>
|
||||
#include <queue>
|
||||
#include <id_list.h>
|
||||
#include <s2/s2point.h>
|
||||
@ -192,19 +193,19 @@ void filter_result_t::or_filter_results(const filter_result_t& a, const filter_r
|
||||
}
|
||||
|
||||
void filter_result_iterator_t::and_filter_iterators() {
|
||||
while (left_it->is_valid && right_it->is_valid) {
|
||||
while (left_it->validity && right_it->validity) {
|
||||
while (left_it->seq_id < right_it->seq_id) {
|
||||
left_it->skip_to(right_it->seq_id);
|
||||
if (!left_it->is_valid) {
|
||||
is_valid = false;
|
||||
if (!left_it->validity) {
|
||||
validity = invalid;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
while (left_it->seq_id > right_it->seq_id) {
|
||||
right_it->skip_to(left_it->seq_id);
|
||||
if (!right_it->is_valid) {
|
||||
is_valid = false;
|
||||
if (!right_it->validity) {
|
||||
validity = invalid;
|
||||
return;
|
||||
}
|
||||
}
|
||||
@ -224,11 +225,11 @@ void filter_result_iterator_t::and_filter_iterators() {
|
||||
}
|
||||
}
|
||||
|
||||
is_valid = false;
|
||||
validity = invalid;
|
||||
}
|
||||
|
||||
void filter_result_iterator_t::or_filter_iterators() {
|
||||
if (left_it->is_valid && right_it->is_valid) {
|
||||
if (left_it->validity && right_it->validity) {
|
||||
if (left_it->seq_id < right_it->seq_id) {
|
||||
seq_id = left_it->seq_id;
|
||||
reference.clear();
|
||||
@ -264,7 +265,7 @@ void filter_result_iterator_t::or_filter_iterators() {
|
||||
return;
|
||||
}
|
||||
|
||||
if (left_it->is_valid) {
|
||||
if (left_it->validity) {
|
||||
seq_id = left_it->seq_id;
|
||||
reference.clear();
|
||||
|
||||
@ -275,7 +276,7 @@ void filter_result_iterator_t::or_filter_iterators() {
|
||||
return;
|
||||
}
|
||||
|
||||
if (right_it->is_valid) {
|
||||
if (right_it->validity) {
|
||||
seq_id = right_it->seq_id;
|
||||
reference.clear();
|
||||
|
||||
@ -286,7 +287,7 @@ void filter_result_iterator_t::or_filter_iterators() {
|
||||
return;
|
||||
}
|
||||
|
||||
is_valid = false;
|
||||
validity = invalid;
|
||||
}
|
||||
|
||||
void filter_result_iterator_t::advance_string_filter_token_iterators() {
|
||||
@ -407,18 +408,22 @@ void filter_result_iterator_t::get_string_filter_next_match(const bool& field_is
|
||||
seq_id = lowest_id;
|
||||
}
|
||||
|
||||
is_valid = one_is_valid;
|
||||
validity = one_is_valid ? valid : invalid;
|
||||
}
|
||||
|
||||
void filter_result_iterator_t::next() {
|
||||
if (!is_valid) {
|
||||
if (validity != valid) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (timeout_info != nullptr && is_timed_out()) {
|
||||
return;
|
||||
}
|
||||
|
||||
// No need to traverse iterator tree if there's only one filter or compute_result() has been called.
|
||||
if (is_filter_result_initialized) {
|
||||
if (++result_index >= filter_result.count) {
|
||||
is_valid = false;
|
||||
validity = invalid;
|
||||
return;
|
||||
}
|
||||
|
||||
@ -457,7 +462,7 @@ void filter_result_iterator_t::next() {
|
||||
const filter a_filter = filter_node->filter_exp;
|
||||
|
||||
if (!index->field_is_indexed(a_filter.field_name)) {
|
||||
is_valid = false;
|
||||
validity = invalid;
|
||||
return;
|
||||
}
|
||||
|
||||
@ -474,15 +479,15 @@ void filter_result_iterator_t::next() {
|
||||
previous_match = seq_id;
|
||||
advance_string_filter_token_iterators();
|
||||
get_string_filter_next_match(f.is_array());
|
||||
} while (is_valid && previous_match + 1 == seq_id);
|
||||
} while (validity && previous_match + 1 == seq_id);
|
||||
|
||||
if (!is_valid) {
|
||||
if (!validity) {
|
||||
// We've reached the end of the index, no possible matches pending.
|
||||
if (previous_match >= index->seq_ids->last_id()) {
|
||||
return;
|
||||
}
|
||||
|
||||
is_valid = true;
|
||||
validity = valid;
|
||||
result_index = index->seq_ids->last_id() + 1;
|
||||
seq_id = previous_match + 1;
|
||||
return;
|
||||
@ -540,8 +545,8 @@ void filter_result_iterator_t::get_string_filter_first_match(const bool& field_i
|
||||
|
||||
if (filter_node->filter_exp.apply_not_equals) {
|
||||
// filter didn't match any id. So by applying not equals, every id in the index is a match.
|
||||
if (!is_valid) {
|
||||
is_valid = true;
|
||||
if (!validity) {
|
||||
validity = valid;
|
||||
seq_id = 0;
|
||||
result_index = index->seq_ids->last_id() + 1;
|
||||
return;
|
||||
@ -560,15 +565,15 @@ void filter_result_iterator_t::get_string_filter_first_match(const bool& field_i
|
||||
previous_match = seq_id;
|
||||
advance_string_filter_token_iterators();
|
||||
get_string_filter_next_match(field_is_array);
|
||||
} while (is_valid && previous_match + 1 == seq_id);
|
||||
} while (validity && previous_match + 1 == seq_id);
|
||||
|
||||
if (!is_valid) {
|
||||
if (!validity) {
|
||||
// filter matched all the ids in the index. So for not equals, there's no match.
|
||||
if (previous_match >= index->seq_ids->last_id()) {
|
||||
return;
|
||||
}
|
||||
|
||||
is_valid = true;
|
||||
validity = valid;
|
||||
result_index = index->seq_ids->last_id() + 1;
|
||||
seq_id = previous_match + 1;
|
||||
return;
|
||||
@ -611,7 +616,7 @@ void filter_result_iterator_t::init() {
|
||||
auto ref_collection = cm.get_collection(ref_collection_name);
|
||||
if (ref_collection == nullptr) {
|
||||
status = Option<bool>(400, "Referenced collection `" + ref_collection_name + "` not found.");
|
||||
is_valid = false;
|
||||
validity = invalid;
|
||||
return;
|
||||
}
|
||||
|
||||
@ -620,7 +625,7 @@ void filter_result_iterator_t::init() {
|
||||
has_reference = ref_collection->is_referenced_in(collection_name);
|
||||
if (!is_referenced && !has_reference) {
|
||||
status = Option<bool>(400, "Failed to join on `" + ref_collection_name + "`: No reference field found.");
|
||||
is_valid = false;
|
||||
validity = invalid;
|
||||
return;
|
||||
}
|
||||
|
||||
@ -632,7 +637,7 @@ void filter_result_iterator_t::init() {
|
||||
if (!reference_filter_op.ok()) {
|
||||
status = Option<bool>(400, "Failed to join on `" + a_filter.referenced_collection_name
|
||||
+ "` collection: " + reference_filter_op.error());
|
||||
is_valid = false;
|
||||
validity = invalid;
|
||||
return;
|
||||
}
|
||||
} else if (has_reference) {
|
||||
@ -643,14 +648,14 @@ void filter_result_iterator_t::init() {
|
||||
if (!reference_filter_op.ok()) {
|
||||
status = Option<bool>(400, "Failed to join on `" + a_filter.referenced_collection_name
|
||||
+ "` collection: " + reference_filter_op.error());
|
||||
is_valid = false;
|
||||
validity = invalid;
|
||||
return;
|
||||
}
|
||||
|
||||
auto get_reference_field_op = ref_collection->get_referenced_in_field_with_lock(collection_name);
|
||||
if (!get_reference_field_op.ok()) {
|
||||
status = Option<bool>(get_reference_field_op.code(), get_reference_field_op.error());
|
||||
is_valid = false;
|
||||
validity = invalid;
|
||||
return;
|
||||
}
|
||||
|
||||
@ -669,7 +674,7 @@ void filter_result_iterator_t::init() {
|
||||
auto filter_init_op = fit.init_status();
|
||||
if (!filter_init_op.ok()) {
|
||||
status = Option<bool>(filter_init_op.code(), filter_init_op.error());
|
||||
is_valid = false;
|
||||
validity = invalid;
|
||||
return;
|
||||
}
|
||||
|
||||
@ -677,7 +682,7 @@ void filter_result_iterator_t::init() {
|
||||
}
|
||||
|
||||
if (filter_result.count == 0) {
|
||||
is_valid = false;
|
||||
validity = invalid;
|
||||
return;
|
||||
}
|
||||
|
||||
@ -711,7 +716,7 @@ void filter_result_iterator_t::init() {
|
||||
}
|
||||
|
||||
if (filter_result.count == 0) {
|
||||
is_valid = false;
|
||||
validity = invalid;
|
||||
return;
|
||||
}
|
||||
|
||||
@ -723,7 +728,7 @@ void filter_result_iterator_t::init() {
|
||||
|
||||
if (!index->field_is_indexed(a_filter.field_name)) {
|
||||
status = Option<bool>(400, "Cannot filter on non-indexed field `" + a_filter.field_name + "`.");
|
||||
is_valid = false;
|
||||
validity = invalid;
|
||||
return;
|
||||
}
|
||||
|
||||
@ -794,7 +799,7 @@ void filter_result_iterator_t::init() {
|
||||
}
|
||||
|
||||
if (filter_result.count == 0) {
|
||||
is_valid = false;
|
||||
validity = invalid;
|
||||
return;
|
||||
}
|
||||
|
||||
@ -869,7 +874,7 @@ void filter_result_iterator_t::init() {
|
||||
}
|
||||
|
||||
if (filter_result.count == 0) {
|
||||
is_valid = false;
|
||||
validity = invalid;
|
||||
return;
|
||||
}
|
||||
|
||||
@ -930,7 +935,7 @@ void filter_result_iterator_t::init() {
|
||||
}
|
||||
|
||||
if (filter_result.count == 0) {
|
||||
is_valid = false;
|
||||
validity = invalid;
|
||||
return;
|
||||
}
|
||||
|
||||
@ -979,7 +984,7 @@ void filter_result_iterator_t::init() {
|
||||
status = Option<bool>(400, "Polygon" + (a_filter.values.size() > 1 ?
|
||||
" at position " + std::to_string(fi + 1) : "")
|
||||
+ " is invalid: " + error.text());
|
||||
is_valid = false;
|
||||
validity = invalid;
|
||||
return;
|
||||
} else {
|
||||
query_region = loop;
|
||||
@ -1081,7 +1086,7 @@ void filter_result_iterator_t::init() {
|
||||
}
|
||||
|
||||
if (filter_result.count == 0) {
|
||||
is_valid = false;
|
||||
validity = invalid;
|
||||
return;
|
||||
}
|
||||
|
||||
@ -1142,8 +1147,8 @@ void filter_result_iterator_t::init() {
|
||||
}
|
||||
}
|
||||
|
||||
void filter_result_iterator_t::skip_to(uint32_t id) {
|
||||
if (!is_valid) {
|
||||
void filter_result_iterator_t::skip_to(uint32_t id, const bool& override_timeout) {
|
||||
if (validity == invalid || (!override_timeout && timeout_info != nullptr && is_timed_out())) {
|
||||
return;
|
||||
}
|
||||
|
||||
@ -1152,7 +1157,7 @@ void filter_result_iterator_t::skip_to(uint32_t id) {
|
||||
ArrayUtils::skip_index_to_id(result_index, filter_result.docs, filter_result.count, id);
|
||||
|
||||
if (result_index >= filter_result.count) {
|
||||
is_valid = false;
|
||||
validity = invalid;
|
||||
return;
|
||||
}
|
||||
|
||||
@ -1183,7 +1188,7 @@ void filter_result_iterator_t::skip_to(uint32_t id) {
|
||||
const filter a_filter = filter_node->filter_exp;
|
||||
|
||||
if (!index->field_is_indexed(a_filter.field_name)) {
|
||||
is_valid = false;
|
||||
validity = invalid;
|
||||
return;
|
||||
}
|
||||
|
||||
@ -1209,16 +1214,16 @@ void filter_result_iterator_t::skip_to(uint32_t id) {
|
||||
previous_match = seq_id;
|
||||
advance_string_filter_token_iterators();
|
||||
get_string_filter_next_match(f.is_array());
|
||||
} while (is_valid && previous_match + 1 == seq_id);
|
||||
} while (is_valid && seq_id <= id);
|
||||
} while (validity && previous_match + 1 == seq_id);
|
||||
} while (validity && seq_id <= id);
|
||||
|
||||
if (!is_valid) {
|
||||
if (!validity) {
|
||||
// filter matched all the ids in the index. So for not equals, there's no match.
|
||||
if (previous_match >= index->seq_ids->last_id()) {
|
||||
return;
|
||||
}
|
||||
|
||||
is_valid = true;
|
||||
validity = valid;
|
||||
seq_id = previous_match + 1;
|
||||
result_index = index->seq_ids->last_id() + 1;
|
||||
|
||||
@ -1257,33 +1262,38 @@ void filter_result_iterator_t::skip_to(uint32_t id) {
|
||||
}
|
||||
}
|
||||
|
||||
int filter_result_iterator_t::valid(uint32_t id) {
|
||||
if (!is_valid) {
|
||||
int filter_result_iterator_t::is_valid(uint32_t id) {
|
||||
if (validity != valid) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// No need to traverse iterator tree if there's only one filter or compute_result() has been called.
|
||||
if (is_filter_result_initialized) {
|
||||
skip_to(id);
|
||||
return is_valid ? (seq_id == id ? 1 : 0) : -1;
|
||||
return validity ? (seq_id == id ? 1 : 0) : -1;
|
||||
}
|
||||
|
||||
if (timeout_info != nullptr && is_timed_out()) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (filter_node->isOperator) {
|
||||
auto left_valid = left_it->valid(id), right_valid = right_it->valid(id);
|
||||
// We only need to consider only valid/invalid state since child nodes can never time out.
|
||||
auto left_validity = left_it->is_valid(id), right_validity = right_it->is_valid(id);
|
||||
|
||||
if (filter_node->filter_operator == AND) {
|
||||
is_valid = left_it->is_valid && right_it->is_valid;
|
||||
validity = (left_it->validity == valid && right_it->validity == valid) ? valid : invalid;
|
||||
|
||||
if (left_valid < 1 || right_valid < 1) {
|
||||
if (left_valid == -1 || right_valid == -1) {
|
||||
if (left_validity < 1 || right_validity < 1) {
|
||||
if (left_validity == -1 || right_validity == -1) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// id did not match the filter but both of the sub-iterators are still valid.
|
||||
// Updating seq_id to the next potential match.
|
||||
if (left_valid == 0 && right_valid == 0) {
|
||||
if (left_validity == 0 && right_validity == 0) {
|
||||
seq_id = std::max(left_it->seq_id, right_it->seq_id);
|
||||
} else if (left_valid == 0) {
|
||||
} else if (left_validity == 0) {
|
||||
seq_id = left_it->seq_id;
|
||||
} else {
|
||||
seq_id = right_it->seq_id;
|
||||
@ -1295,18 +1305,18 @@ int filter_result_iterator_t::valid(uint32_t id) {
|
||||
seq_id = id;
|
||||
return 1;
|
||||
} else {
|
||||
is_valid = left_it->is_valid || right_it->is_valid;
|
||||
validity = (left_it->validity == valid || right_it->validity == valid) ? valid : invalid;
|
||||
|
||||
if (left_valid < 1 && right_valid < 1) {
|
||||
if (left_valid == -1 && right_valid == -1) {
|
||||
if (left_validity < 1 && right_validity < 1) {
|
||||
if (left_validity == -1 && right_validity == -1) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// id did not match the filter; both of the sub-iterators or one of them might be valid.
|
||||
// Updating seq_id to the next match.
|
||||
if (left_valid == 0 && right_valid == 0) {
|
||||
if (left_validity == 0 && right_validity == 0) {
|
||||
seq_id = std::min(left_it->seq_id, right_it->seq_id);
|
||||
} else if (left_valid == 0) {
|
||||
} else if (left_validity == 0) {
|
||||
seq_id = left_it->seq_id;
|
||||
} else {
|
||||
seq_id = right_it->seq_id;
|
||||
@ -1321,7 +1331,7 @@ int filter_result_iterator_t::valid(uint32_t id) {
|
||||
}
|
||||
|
||||
skip_to(id);
|
||||
return is_valid ? (seq_id == id ? 1 : 0) : -1;
|
||||
return validity ? (seq_id == id ? 1 : 0) : -1;
|
||||
}
|
||||
|
||||
Option<bool> filter_result_iterator_t::init_status() {
|
||||
@ -1339,7 +1349,7 @@ bool filter_result_iterator_t::contains_atleast_one(const void *obj) {
|
||||
compact_posting_list_t* list = COMPACT_POSTING_PTR(obj);
|
||||
|
||||
size_t i = 0;
|
||||
while(i < list->length && is_valid) {
|
||||
while(i < list->length && validity == valid) {
|
||||
size_t num_existing_offsets = list->id_offsets[i];
|
||||
size_t existing_id = list->id_offsets[i + num_existing_offsets + 1];
|
||||
|
||||
@ -1358,7 +1368,7 @@ bool filter_result_iterator_t::contains_atleast_one(const void *obj) {
|
||||
auto list = (posting_list_t*)(obj);
|
||||
posting_list_t::iterator_t it = list->new_iterator();
|
||||
|
||||
while(it.valid() && is_valid) {
|
||||
while(it.valid() && validity == valid) {
|
||||
uint32_t id = it.id();
|
||||
|
||||
if(id == seq_id) {
|
||||
@ -1376,15 +1386,19 @@ bool filter_result_iterator_t::contains_atleast_one(const void *obj) {
|
||||
return false;
|
||||
}
|
||||
|
||||
void filter_result_iterator_t::reset() {
|
||||
void filter_result_iterator_t::reset(const bool& override_timeout) {
|
||||
if (filter_node == nullptr) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!override_timeout && timeout_info != nullptr && is_timed_out()) {
|
||||
return;
|
||||
}
|
||||
|
||||
// No need to traverse iterator tree if there's only one filter or compute_result() has been called.
|
||||
if (is_filter_result_initialized) {
|
||||
if (filter_result.count == 0) {
|
||||
is_valid = false;
|
||||
validity = invalid;
|
||||
return;
|
||||
}
|
||||
|
||||
@ -1397,7 +1411,7 @@ void filter_result_iterator_t::reset() {
|
||||
reference.insert(ref.begin(), ref.end());
|
||||
}
|
||||
|
||||
is_valid = true;
|
||||
validity = valid;
|
||||
return;
|
||||
}
|
||||
|
||||
@ -1405,7 +1419,7 @@ void filter_result_iterator_t::reset() {
|
||||
// Reset the subtrees then apply operators to arrive at the first valid doc.
|
||||
left_it->reset();
|
||||
right_it->reset();
|
||||
is_valid = true;
|
||||
validity = valid;
|
||||
|
||||
if (filter_node->filter_operator == AND) {
|
||||
and_filter_iterators();
|
||||
@ -1440,7 +1454,7 @@ void filter_result_iterator_t::reset() {
|
||||
}
|
||||
|
||||
uint32_t filter_result_iterator_t::to_filter_id_array(uint32_t*& filter_array) {
|
||||
if (!is_valid) {
|
||||
if (validity != valid) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1454,7 +1468,7 @@ uint32_t filter_result_iterator_t::to_filter_id_array(uint32_t*& filter_array) {
|
||||
do {
|
||||
filter_ids.push_back(seq_id);
|
||||
next();
|
||||
} while (is_valid);
|
||||
} while (validity == valid);
|
||||
|
||||
filter_array = new uint32_t[filter_ids.size()];
|
||||
std::copy(filter_ids.begin(), filter_ids.end(), filter_array);
|
||||
@ -1463,7 +1477,7 @@ uint32_t filter_result_iterator_t::to_filter_id_array(uint32_t*& filter_array) {
|
||||
}
|
||||
|
||||
uint32_t filter_result_iterator_t::and_scalar(const uint32_t* A, const uint32_t& lenA, uint32_t*& results) {
|
||||
if (!is_valid) {
|
||||
if (validity != valid) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1473,7 +1487,7 @@ uint32_t filter_result_iterator_t::and_scalar(const uint32_t* A, const uint32_t&
|
||||
|
||||
std::vector<uint32_t> filter_ids;
|
||||
for (uint32_t i = 0; i < lenA; i++) {
|
||||
auto result = valid(A[i]);
|
||||
auto result = is_valid(A[i]);
|
||||
|
||||
if (result == -1) {
|
||||
break;
|
||||
@ -1495,7 +1509,7 @@ uint32_t filter_result_iterator_t::and_scalar(const uint32_t* A, const uint32_t&
|
||||
}
|
||||
|
||||
void filter_result_iterator_t::and_scalar(const uint32_t* A, const uint32_t& lenA, filter_result_t& result) {
|
||||
if (!is_valid) {
|
||||
if (validity != valid) {
|
||||
return;
|
||||
}
|
||||
|
||||
@ -1507,7 +1521,7 @@ void filter_result_iterator_t::and_scalar(const uint32_t* A, const uint32_t& len
|
||||
|
||||
std::vector<uint32_t> filter_ids;
|
||||
for (uint32_t i = 0; i < lenA; i++) {
|
||||
auto _result = valid(A[i]);
|
||||
auto _result = is_valid(A[i]);
|
||||
|
||||
if (_result == -1) {
|
||||
break;
|
||||
@ -1534,7 +1548,7 @@ void filter_result_iterator_t::and_scalar(const uint32_t* A, const uint32_t& len
|
||||
|
||||
std::vector<uint32_t> match_indexes;
|
||||
for (uint32_t i = 0; i < lenA; i++) {
|
||||
auto _result = valid(A[i]);
|
||||
auto _result = is_valid(A[i]);
|
||||
|
||||
if (_result == -1) {
|
||||
break;
|
||||
@ -1559,16 +1573,22 @@ void filter_result_iterator_t::and_scalar(const uint32_t* A, const uint32_t& len
|
||||
}
|
||||
}
|
||||
|
||||
filter_result_iterator_t::filter_result_iterator_t(const std::string collection_name, const Index *const index,
|
||||
const filter_node_t *const filter_node) :
|
||||
filter_result_iterator_t::filter_result_iterator_t(const std::string& collection_name, const Index *const index,
|
||||
const filter_node_t *const filter_node,
|
||||
uint64_t search_begin, uint64_t search_stop) :
|
||||
collection_name(collection_name),
|
||||
index(index),
|
||||
filter_node(filter_node) {
|
||||
if (filter_node == nullptr) {
|
||||
is_valid = false;
|
||||
validity = invalid;
|
||||
return;
|
||||
}
|
||||
|
||||
// Only initialize timeout_info in the root node. We won't pass search_begin/search_stop parameters to the sub-nodes.
|
||||
if (search_stop != UINT64_MAX) {
|
||||
timeout_info = std::make_unique<filter_result_iterator_timeout_info>(search_begin, search_stop);
|
||||
}
|
||||
|
||||
// Generate the iterator tree and then initialize each node.
|
||||
if (filter_node->isOperator) {
|
||||
left_it = new filter_result_iterator_t(collection_name, index, filter_node->left);
|
||||
@ -1577,7 +1597,7 @@ filter_result_iterator_t::filter_result_iterator_t(const std::string collection_
|
||||
|
||||
init();
|
||||
|
||||
if (!is_valid) {
|
||||
if (!validity) {
|
||||
this->approx_filter_ids_length = 0;
|
||||
}
|
||||
}
|
||||
@ -1625,7 +1645,7 @@ filter_result_iterator_t& filter_result_iterator_t::operator=(filter_result_iter
|
||||
posting_list_iterators = std::move(obj.posting_list_iterators);
|
||||
expanded_plists = std::move(obj.expanded_plists);
|
||||
|
||||
is_valid = obj.is_valid;
|
||||
validity = obj.validity;
|
||||
|
||||
seq_id = obj.seq_id;
|
||||
reference = std::move(obj.reference);
|
||||
@ -1637,11 +1657,25 @@ filter_result_iterator_t& filter_result_iterator_t::operator=(filter_result_iter
|
||||
return *this;
|
||||
}
|
||||
|
||||
void filter_result_iterator_t::get_n_ids(const uint32_t& n, filter_result_t*& result) {
|
||||
void filter_result_iterator_t::get_n_ids(const uint32_t& n, filter_result_t*& result, const bool& override_timeout) {
|
||||
if (!is_filter_result_initialized) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (override_timeout) {
|
||||
result_index = 0;
|
||||
} else if (timeout_info != nullptr) {
|
||||
// In Index::search_wildcard number of calls to get_n_ids will be min(number of threads, filter match ids).
|
||||
// Therefore, `timeout_info->function_call_counter` won't reach `function_call_modulo` if only incremented on
|
||||
// function call.
|
||||
if (n > function_call_modulo) {
|
||||
timeout_info->function_call_counter = function_call_modulo - 1;
|
||||
}
|
||||
if (is_timed_out()) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
auto result_length = result->count = std::min(n, filter_result.count - result_index);
|
||||
result->docs = new uint32_t[result_length];
|
||||
if (filter_result.coll_to_references != nullptr) {
|
||||
@ -1661,16 +1695,18 @@ void filter_result_iterator_t::get_n_ids(const uint32_t& n, filter_result_t*& re
|
||||
result_reference = std::move(filter_result.coll_to_references[result_index]);
|
||||
}
|
||||
|
||||
is_valid = result_index < filter_result.count;
|
||||
if (!override_timeout) {
|
||||
validity = result_index < filter_result.count ? valid : invalid;
|
||||
}
|
||||
}
|
||||
|
||||
void filter_result_iterator_t::get_n_ids(const uint32_t& n,
|
||||
uint32_t& excluded_result_index,
|
||||
uint32_t const* const excluded_result_ids, const size_t& excluded_result_ids_size,
|
||||
filter_result_t*& result) {
|
||||
filter_result_t*& result, const bool& override_timeout) {
|
||||
if (excluded_result_ids == nullptr || excluded_result_ids_size == 0 ||
|
||||
excluded_result_index >= excluded_result_ids_size) {
|
||||
return get_n_ids(n, result);
|
||||
return get_n_ids(n, result, override_timeout);
|
||||
}
|
||||
|
||||
// This method is only called in Index::search_wildcard after filter_result_iterator_t::compute_result.
|
||||
@ -1678,6 +1714,20 @@ void filter_result_iterator_t::get_n_ids(const uint32_t& n,
|
||||
return;
|
||||
}
|
||||
|
||||
if (override_timeout) {
|
||||
result_index = 0;
|
||||
} else if (timeout_info != nullptr) {
|
||||
// In Index::search_wildcard number of calls to get_n_ids will be min(number of threads, filter match ids).
|
||||
// Therefore, `timeout_info->function_call_counter` won't reach `function_call_modulo` if only incremented on
|
||||
// function call.
|
||||
if (n > function_call_modulo) {
|
||||
timeout_info->function_call_counter = function_call_modulo - 1;
|
||||
}
|
||||
if (is_timed_out()) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<uint32_t> match_indexes;
|
||||
for (uint32_t count = 0; count < n && result_index < filter_result.count; result_index++) {
|
||||
auto id = filter_result.docs[result_index];
|
||||
@ -1708,7 +1758,9 @@ void filter_result_iterator_t::get_n_ids(const uint32_t& n,
|
||||
result_reference = std::move(filter_result.coll_to_references[match_index]);
|
||||
}
|
||||
|
||||
is_valid = result_index < filter_result.count;
|
||||
if (!override_timeout) {
|
||||
validity = result_index < filter_result.count ? valid : invalid;
|
||||
}
|
||||
}
|
||||
|
||||
filter_result_iterator_t::filter_result_iterator_t(uint32_t approx_filter_ids_length) :
|
||||
@ -1717,29 +1769,34 @@ filter_result_iterator_t::filter_result_iterator_t(uint32_t approx_filter_ids_le
|
||||
delete_filter_node = true;
|
||||
}
|
||||
|
||||
filter_result_iterator_t::filter_result_iterator_t(uint32_t* ids, const uint32_t& ids_count) {
|
||||
filter_result_iterator_t::filter_result_iterator_t(uint32_t* ids, const uint32_t& ids_count,
|
||||
uint64_t search_begin, uint64_t search_stop) {
|
||||
filter_result.count = approx_filter_ids_length = ids_count;
|
||||
filter_result.docs = ids;
|
||||
is_valid = ids_count > 0;
|
||||
validity = ids_count > 0 ? valid : invalid;
|
||||
|
||||
if (is_valid) {
|
||||
if (validity) {
|
||||
seq_id = filter_result.docs[result_index];
|
||||
is_filter_result_initialized = true;
|
||||
filter_node = new filter_node_t({"dummy", {}, {}});
|
||||
delete_filter_node = true;
|
||||
|
||||
if (search_stop != UINT64_MAX) {
|
||||
timeout_info = std::make_unique<filter_result_iterator_timeout_info>(search_begin, search_stop);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void filter_result_iterator_t::add_phrase_ids(filter_result_iterator_t*& filter_result_iterator,
|
||||
void filter_result_iterator_t::add_phrase_ids(filter_result_iterator_t*& fit,
|
||||
uint32_t* phrase_result_ids, const uint32_t& phrase_result_count) {
|
||||
auto root_iterator = new filter_result_iterator_t(std::min(phrase_result_count, filter_result_iterator->approx_filter_ids_length));
|
||||
auto root_iterator = new filter_result_iterator_t(std::min(phrase_result_count, fit->approx_filter_ids_length));
|
||||
root_iterator->left_it = new filter_result_iterator_t(phrase_result_ids, phrase_result_count);
|
||||
root_iterator->right_it = filter_result_iterator;
|
||||
root_iterator->right_it = fit;
|
||||
|
||||
auto& left_it = root_iterator->left_it;
|
||||
auto& right_it = root_iterator->right_it;
|
||||
|
||||
while (left_it->is_valid && right_it->is_valid && left_it->seq_id != right_it->seq_id) {
|
||||
while (left_it->validity && right_it->validity && left_it->seq_id != right_it->seq_id) {
|
||||
if (left_it->seq_id < right_it->seq_id) {
|
||||
left_it->skip_to(right_it->seq_id);
|
||||
} else {
|
||||
@ -1747,19 +1804,25 @@ void filter_result_iterator_t::add_phrase_ids(filter_result_iterator_t*& filter_
|
||||
}
|
||||
}
|
||||
|
||||
root_iterator->is_valid = left_it->is_valid && right_it->is_valid;
|
||||
root_iterator->timeout_info = std::move(fit->timeout_info);
|
||||
root_iterator->validity = (left_it->validity == timed_out || right_it->validity == timed_out) ? timed_out :
|
||||
(left_it->validity == invalid || right_it->validity == invalid) ? invalid : valid;
|
||||
root_iterator->seq_id = left_it->seq_id;
|
||||
filter_result_iterator = root_iterator;
|
||||
fit = root_iterator;
|
||||
}
|
||||
|
||||
void filter_result_iterator_t::compute_result() {
|
||||
if (filter_node == nullptr) {
|
||||
is_valid = false;
|
||||
validity = invalid;
|
||||
is_filter_result_initialized = false;
|
||||
LOG(ERROR) << "filter_node is null";
|
||||
return;
|
||||
}
|
||||
|
||||
if (timeout_info != nullptr && is_timed_out()) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (filter_node->isOperator) {
|
||||
left_it->compute_result();
|
||||
right_it->compute_result();
|
||||
@ -1773,7 +1836,7 @@ void filter_result_iterator_t::compute_result() {
|
||||
// In a complex filter query a sub-expression might not match any document while the full expression does match
|
||||
// at least one document. If the full expression doesn't match any document, we return early in the search.
|
||||
if (filter_result.count == 0) {
|
||||
is_valid = false;
|
||||
validity = invalid;
|
||||
is_filter_result_initialized = true;
|
||||
return;
|
||||
}
|
||||
@ -1882,7 +1945,7 @@ void filter_result_iterator_t::compute_result() {
|
||||
}
|
||||
|
||||
if (filter_result.count == 0) {
|
||||
is_valid = false;
|
||||
validity = invalid;
|
||||
return;
|
||||
}
|
||||
|
||||
@ -1891,3 +1954,18 @@ void filter_result_iterator_t::compute_result() {
|
||||
is_filter_result_initialized = true;
|
||||
approx_filter_ids_length = filter_result.count;
|
||||
}
|
||||
|
||||
bool filter_result_iterator_t::is_timed_out() {
|
||||
if (validity == timed_out ||
|
||||
(++(timeout_info->function_call_counter) % function_call_modulo == 0 && (std::chrono::duration_cast<std::chrono::microseconds>(
|
||||
std::chrono::system_clock::now().time_since_epoch()).count() - timeout_info->search_begin_us) > timeout_info->search_stop_us)) {
|
||||
validity = timed_out;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
filter_result_iterator_timeout_info::filter_result_iterator_timeout_info(uint64_t search_begin,
|
||||
uint64_t search_stop) :
|
||||
search_begin_us(search_begin),
|
||||
search_stop_us(search_stop) {}
|
||||
|
103
src/index.cpp
103
src/index.cpp
@ -1641,6 +1641,7 @@ Option<bool> Index::search_all_candidates(const size_t num_search_fields,
|
||||
|
||||
query_hashes.insert(qhash);
|
||||
filter_result_iterator->reset();
|
||||
search_cutoff = search_cutoff || filter_result_iterator->validity == filter_result_iterator_t::timed_out;
|
||||
}
|
||||
|
||||
return Option<bool>(true);
|
||||
@ -1790,7 +1791,8 @@ Option<bool> Index::do_filtering_with_lock(filter_node_t* const filter_tree_root
|
||||
const std::string& collection_name) const {
|
||||
std::shared_lock lock(mutex);
|
||||
|
||||
auto filter_result_iterator = filter_result_iterator_t(collection_name, this, filter_tree_root);
|
||||
auto filter_result_iterator = filter_result_iterator_t(collection_name, this, filter_tree_root,
|
||||
search_begin_us, search_stop_us);
|
||||
auto filter_init_op = filter_result_iterator.init_status();
|
||||
if (!filter_init_op.ok()) {
|
||||
return filter_init_op;
|
||||
@ -1807,7 +1809,8 @@ Option<bool> Index::do_reference_filtering_with_lock(filter_node_t* const filter
|
||||
const std::string& reference_helper_field_name) const {
|
||||
std::shared_lock lock(mutex);
|
||||
|
||||
auto filter_result_iterator = filter_result_iterator_t(collection_name, this, filter_tree_root);
|
||||
auto filter_result_iterator = filter_result_iterator_t(collection_name, this, filter_tree_root,
|
||||
search_begin_us, search_stop_us);
|
||||
auto filter_init_op = filter_result_iterator.init_status();
|
||||
if (!filter_init_op.ok()) {
|
||||
return filter_init_op;
|
||||
@ -2406,7 +2409,8 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
|
||||
) const {
|
||||
std::shared_lock lock(mutex);
|
||||
|
||||
auto filter_result_iterator = new filter_result_iterator_t(collection_name, this, filter_tree_root);
|
||||
auto filter_result_iterator = new filter_result_iterator_t(collection_name, this, filter_tree_root,
|
||||
search_begin_us, search_stop_us);
|
||||
std::unique_ptr<filter_result_iterator_t> filter_iterator_guard(filter_result_iterator);
|
||||
|
||||
auto filter_init_op = filter_result_iterator->init_status();
|
||||
@ -2414,7 +2418,7 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
|
||||
return filter_init_op;
|
||||
}
|
||||
|
||||
if (filter_tree_root != nullptr && !filter_result_iterator->is_valid) {
|
||||
if (filter_tree_root != nullptr && filter_result_iterator->validity != filter_result_iterator_t::valid) {
|
||||
return Option(true);
|
||||
}
|
||||
|
||||
@ -2430,6 +2434,7 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
|
||||
filter_result_iterator, curated_ids, included_ids_map,
|
||||
included_ids_vec, excluded_group_ids);
|
||||
filter_result_iterator->reset();
|
||||
search_cutoff = search_cutoff || filter_result_iterator->validity == filter_result_iterator_t::timed_out;
|
||||
|
||||
std::vector<uint32_t> curated_ids_sorted(curated_ids.begin(), curated_ids.end());
|
||||
std::sort(curated_ids_sorted.begin(), curated_ids_sorted.end());
|
||||
@ -2462,7 +2467,7 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
|
||||
// phrase queries are handled as a filtering query
|
||||
bool is_wildcard_non_phrase_query = is_wildcard_query && field_query_tokens[0].q_phrases.empty();
|
||||
|
||||
bool no_filters_provided = (filter_tree_root == nullptr && !filter_result_iterator->is_valid);
|
||||
bool no_filters_provided = (filter_tree_root == nullptr && !filter_result_iterator->validity == filter_result_iterator_t::valid);
|
||||
|
||||
// handle phrase searches
|
||||
if (!field_query_tokens[0].q_phrases.empty()) {
|
||||
@ -2555,7 +2560,7 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
|
||||
|
||||
uint32_t filter_id_count = 0;
|
||||
while (!no_filters_provided &&
|
||||
filter_id_count < vector_query.flat_search_cutoff && filter_result_iterator->is_valid) {
|
||||
filter_id_count < vector_query.flat_search_cutoff && filter_result_iterator->validity == filter_result_iterator_t::valid) {
|
||||
auto& seq_id = filter_result_iterator->seq_id;
|
||||
auto filter_result = single_filter_result_t(seq_id, std::move(filter_result_iterator->reference));
|
||||
filter_result_iterator->next();
|
||||
@ -2583,9 +2588,10 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
|
||||
filter_id_count++;
|
||||
}
|
||||
filter_result_iterator->reset();
|
||||
search_cutoff = search_cutoff || filter_result_iterator->validity == filter_result_iterator_t::timed_out;
|
||||
|
||||
if(no_filters_provided ||
|
||||
(filter_id_count >= vector_query.flat_search_cutoff && filter_result_iterator->is_valid)) {
|
||||
(filter_id_count >= vector_query.flat_search_cutoff && filter_result_iterator->validity == filter_result_iterator_t::valid)) {
|
||||
dist_results.clear();
|
||||
|
||||
VectorFilterFunctor filterFunctor(filter_result_iterator);
|
||||
@ -2605,15 +2611,20 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
|
||||
|
||||
filter_result_iterator->reset();
|
||||
|
||||
if (filter_result_iterator->is_valid && !filter_result_iterator->reference.empty()) {
|
||||
if (!filter_result_iterator->reference.empty()) {
|
||||
// We'll have to get the references of each document.
|
||||
for (auto pair: pairs) {
|
||||
if (filter_result_iterator->validity == filter_result_iterator_t::timed_out) {
|
||||
// Overriding timeout since we need to get the references of matched docs.
|
||||
filter_result_iterator->reset(true);
|
||||
search_cutoff = true;
|
||||
}
|
||||
|
||||
// The doc_id must be valid otherwise it would've been filtered out upstream.
|
||||
filter_result_iterator->skip_to(pair.second);
|
||||
filter_result_iterator->skip_to(pair.second, search_cutoff);
|
||||
auto filter_result = single_filter_result_t(pair.second,
|
||||
std::move(filter_result_iterator->reference));
|
||||
dist_results.emplace_back(pair.first, filter_result);
|
||||
filter_result_iterator->reset();
|
||||
}
|
||||
} else {
|
||||
for (const auto &pair: pairs) {
|
||||
@ -2688,7 +2699,8 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
|
||||
} else {
|
||||
// if filters were not provided, use the seq_ids index to generate the list of all document ids
|
||||
if (no_filters_provided) {
|
||||
filter_result_iterator = new filter_result_iterator_t(seq_ids->uncompress(), seq_ids->num_ids());
|
||||
filter_result_iterator = new filter_result_iterator_t(seq_ids->uncompress(), seq_ids->num_ids(),
|
||||
search_begin_us, search_stop_us);
|
||||
filter_iterator_guard.reset(filter_result_iterator);
|
||||
}
|
||||
|
||||
@ -2703,8 +2715,6 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
|
||||
if (!search_wildcard_op.ok()) {
|
||||
return search_wildcard_op;
|
||||
}
|
||||
|
||||
filter_result_iterator->reset();
|
||||
}
|
||||
|
||||
uint32_t _all_result_ids_len = all_result_ids_len;
|
||||
@ -2831,6 +2841,7 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
|
||||
}
|
||||
|
||||
filter_result_iterator->reset();
|
||||
search_cutoff = search_cutoff || filter_result_iterator->validity == filter_result_iterator_t::timed_out;
|
||||
|
||||
// gather up both original query and synonym queries and do drop tokens
|
||||
|
||||
@ -2937,6 +2948,7 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
|
||||
}
|
||||
|
||||
filter_result_iterator->reset();
|
||||
search_cutoff = search_cutoff || filter_result_iterator->validity == filter_result_iterator_t::timed_out;
|
||||
|
||||
if(!vector_query.field_name.empty()) {
|
||||
// check at least one of sort fields is text match
|
||||
@ -2969,6 +2981,7 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
|
||||
dist_labels = field_vector_index->vecdex->searchKnnCloserFirst(vector_query.values.data(), k, &filterFunctor);
|
||||
}
|
||||
filter_result_iterator->reset();
|
||||
search_cutoff = search_cutoff || filter_result_iterator->validity == filter_result_iterator_t::timed_out;
|
||||
|
||||
std::vector<std::pair<uint32_t,float>> vec_results;
|
||||
for (const auto& dist_label : dist_labels) {
|
||||
@ -3023,14 +3036,15 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
|
||||
group_by_field_it_vec = get_group_by_field_iterators(group_by_fields);
|
||||
}
|
||||
|
||||
for(size_t res_index = 0; res_index < vec_results.size(); res_index++) {
|
||||
for(size_t res_index = 0; res_index < vec_results.size() &&
|
||||
filter_result_iterator->validity != filter_result_iterator_t::timed_out; res_index++) {
|
||||
auto& vec_result = vec_results[res_index];
|
||||
auto seq_id = vec_result.first;
|
||||
|
||||
filter_result_iterator->skip_to(seq_id);
|
||||
auto references = std::move(filter_result_iterator->reference);
|
||||
filter_result_iterator->reset();
|
||||
|
||||
|
||||
KV* found_kv = nullptr;
|
||||
if(group_limit != 0) {
|
||||
for(auto& kv : kvs) {
|
||||
@ -3106,14 +3120,6 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
|
||||
kv.text_match_score = 0;
|
||||
kv.vector_distance = vec_result.second;
|
||||
|
||||
if (filter_result_iterator->is_valid &&
|
||||
!filter_result_iterator->reference.empty()) {
|
||||
// The doc_id must be valid otherwise it would've been filtered out upstream.
|
||||
filter_result_iterator->skip_to(seq_id);
|
||||
kv.reference_filter_results = std::move(filter_result_iterator->reference);
|
||||
filter_result_iterator->reset();
|
||||
}
|
||||
|
||||
auto ret = topster->add(&kv);
|
||||
vec_search_ids.push_back(seq_id);
|
||||
|
||||
@ -3122,6 +3128,7 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
|
||||
}
|
||||
}
|
||||
}
|
||||
search_cutoff = search_cutoff || filter_result_iterator->validity == filter_result_iterator_t::timed_out;
|
||||
|
||||
if(!vec_search_ids.empty()) {
|
||||
uint32_t* new_all_result_ids = nullptr;
|
||||
@ -3371,9 +3378,9 @@ void Index::process_curated_ids(const std::vector<std::pair<uint32_t, uint32_t>>
|
||||
// if `filter_curated_hits` is enabled, we will remove curated hits that don't match filter condition
|
||||
std::set<uint32_t> included_ids_set;
|
||||
|
||||
if(filter_result_iterator->is_valid && filter_curated_hits) {
|
||||
if(filter_result_iterator->validity == filter_result_iterator_t::valid && filter_curated_hits) {
|
||||
for (const auto &included_id: included_ids_vec) {
|
||||
auto result = filter_result_iterator->valid(included_id);
|
||||
auto result = filter_result_iterator->is_valid(included_id);
|
||||
|
||||
if (result == -1) {
|
||||
break;
|
||||
@ -3589,6 +3596,10 @@ Option<bool> Index::fuzzy_search_fields(const std::vector<search_field_t>& the_f
|
||||
costs[token_index], costs[token_index], max_candidates, token_order, prefix_search,
|
||||
last_token, prev_token, filter_result_iterator, field_leaves, unique_tokens);
|
||||
filter_result_iterator->reset();
|
||||
if (filter_result_iterator->validity == filter_result_iterator_t::timed_out) {
|
||||
search_cutoff = true;
|
||||
return Option<bool>(true);
|
||||
}
|
||||
|
||||
/*auto timeMillis = std::chrono::duration_cast<std::chrono::milliseconds>(
|
||||
std::chrono::high_resolution_clock::now() - begin).count();
|
||||
@ -3620,6 +3631,7 @@ Option<bool> Index::fuzzy_search_fields(const std::vector<search_field_t>& the_f
|
||||
the_fields, num_search_fields, filter_result_iterator, exclude_token_ids,
|
||||
exclude_token_ids_size, prev_token_doc_ids, popular_field_ids);
|
||||
filter_result_iterator->reset();
|
||||
search_cutoff = search_cutoff || filter_result_iterator->validity == filter_result_iterator_t::timed_out;
|
||||
|
||||
for(size_t field_id: query_field_ids) {
|
||||
auto& the_field = the_fields[field_id];
|
||||
@ -3643,6 +3655,10 @@ Option<bool> Index::fuzzy_search_fields(const std::vector<search_field_t>& the_f
|
||||
costs[token_index], costs[token_index], max_candidates, token_order, prefix_search,
|
||||
false, "", filter_result_iterator, field_leaves, unique_tokens);
|
||||
filter_result_iterator->reset();
|
||||
if (filter_result_iterator->validity == filter_result_iterator_t::timed_out) {
|
||||
search_cutoff = true;
|
||||
return Option<bool>(true);
|
||||
}
|
||||
|
||||
if(field_leaves.empty()) {
|
||||
// look at the next field
|
||||
@ -4894,7 +4910,7 @@ Option<bool> Index::do_phrase_search(const size_t num_search_fields, const std::
|
||||
collate_included_ids({}, included_ids_map, curated_topster, searched_queries);
|
||||
|
||||
// AND phrase id matches with filter ids
|
||||
if(filter_result_iterator->is_valid) {
|
||||
if(filter_result_iterator->validity) {
|
||||
filter_result_iterator_t::add_phrase_ids(filter_result_iterator, phrase_result_ids, phrase_result_count);
|
||||
} else {
|
||||
delete filter_result_iterator;
|
||||
@ -4917,7 +4933,8 @@ Option<bool> Index::do_phrase_search(const size_t num_search_fields, const std::
|
||||
group_by_field_it_vec = get_group_by_field_iterators(group_by_fields);
|
||||
}
|
||||
// populate topster
|
||||
for(size_t i = 0; i < std::min<size_t>(10000, all_result_ids_len); i++) {
|
||||
for(size_t i = 0; i < std::min<size_t>(10000, all_result_ids_len) &&
|
||||
filter_result_iterator->validity == filter_result_iterator_t::valid; i++) {
|
||||
auto seq_id = filter_result_iterator->seq_id;
|
||||
auto references = std::move(filter_result_iterator->reference);
|
||||
filter_result_iterator->next();
|
||||
@ -4957,6 +4974,7 @@ Option<bool> Index::do_phrase_search(const size_t num_search_fields, const std::
|
||||
}
|
||||
}
|
||||
filter_result_iterator->reset();
|
||||
search_cutoff = search_cutoff || filter_result_iterator->validity == filter_result_iterator_t::timed_out;
|
||||
|
||||
searched_queries.push_back({});
|
||||
return Option<bool>(true);
|
||||
@ -5068,7 +5086,7 @@ Option<bool> Index::do_infix_search(const size_t num_search_fields, const std::v
|
||||
raw_infix_ids_length = infix_ids.size();
|
||||
}
|
||||
|
||||
if(filter_result_iterator->is_valid) {
|
||||
if(filter_result_iterator->validity == filter_result_iterator_t::valid) {
|
||||
filter_result_t result;
|
||||
filter_result_iterator->and_scalar(raw_infix_ids, raw_infix_ids_length, result);
|
||||
if(raw_infix_ids != &infix_ids[0]) {
|
||||
@ -5440,11 +5458,15 @@ Option<bool> Index::search_wildcard(filter_node_t const* const& filter_tree_root
|
||||
uint32_t excluded_result_index = 0;
|
||||
Option<bool>* compute_sort_score_statuses[num_threads];
|
||||
|
||||
for(size_t thread_id = 0; thread_id < num_threads && filter_result_iterator->is_valid; thread_id++) {
|
||||
for(size_t thread_id = 0; thread_id < num_threads &&
|
||||
filter_result_iterator->validity == filter_result_iterator_t::valid; thread_id++) {
|
||||
auto batch_result = new filter_result_t();
|
||||
filter_result_iterator->get_n_ids(window_size, excluded_result_index, exclude_token_ids,
|
||||
exclude_token_ids_size, batch_result);
|
||||
|
||||
if (batch_result->count == 0) {
|
||||
delete batch_result;
|
||||
break;
|
||||
}
|
||||
num_queued++;
|
||||
|
||||
searched_queries.push_back({});
|
||||
@ -5530,7 +5552,7 @@ Option<bool> Index::search_wildcard(filter_node_t const* const& filter_tree_root
|
||||
std::unique_lock<std::mutex> lock_process(m_process);
|
||||
cv_process.wait(lock_process, [&](){ return num_processed == num_queued; });
|
||||
|
||||
search_cutoff = parent_search_cutoff;
|
||||
search_cutoff = parent_search_cutoff || filter_result_iterator->validity == filter_result_iterator_t::timed_out;
|
||||
|
||||
for(size_t thread_id = 0; thread_id < num_processed; thread_id++) {
|
||||
if (compute_sort_score_statuses[thread_id] != nullptr) {
|
||||
@ -5558,8 +5580,20 @@ Option<bool> Index::search_wildcard(filter_node_t const* const& filter_tree_root
|
||||
std::chrono::high_resolution_clock::now() - beginF).count();
|
||||
LOG(INFO) << "Time for raw scoring: " << timeMillisF;*/
|
||||
|
||||
filter_result_iterator->reset();
|
||||
all_result_ids_len = filter_result_iterator->to_filter_id_array(all_result_ids);
|
||||
if (filter_result_iterator->validity == filter_result_iterator_t::timed_out) {
|
||||
auto partial_result = new filter_result_t();
|
||||
std::unique_ptr<filter_result_t> partial_result_guard(partial_result);
|
||||
|
||||
filter_result_iterator->get_n_ids(window_size * num_processed,
|
||||
excluded_result_index, nullptr, 0, partial_result, true);
|
||||
all_result_ids_len = partial_result->count;
|
||||
all_result_ids = partial_result->docs;
|
||||
partial_result->docs = nullptr;
|
||||
} else {
|
||||
filter_result_iterator->reset();
|
||||
all_result_ids_len = filter_result_iterator->to_filter_id_array(all_result_ids);
|
||||
search_cutoff = search_cutoff || filter_result_iterator->validity == filter_result_iterator_t::timed_out;
|
||||
}
|
||||
|
||||
return Option<bool>(true);
|
||||
}
|
||||
@ -5607,7 +5641,8 @@ void Index::populate_sort_mapping(int* sort_order, std::vector<size_t>& geopoint
|
||||
auto& eval_exp = sort_fields_std[i].eval;
|
||||
auto count = sort_fields_std[i].eval_expressions.size();
|
||||
for (uint32_t j = 0; j < count; j++) {
|
||||
auto filter_result_iterator = filter_result_iterator_t("", this, &eval_exp.filter_trees[j]);
|
||||
auto filter_result_iterator = filter_result_iterator_t("", this, &eval_exp.filter_trees[j],
|
||||
search_begin_us, search_stop_us);
|
||||
auto filter_init_op = filter_result_iterator.init_status();
|
||||
if (!filter_init_op.ok()) {
|
||||
return;
|
||||
|
@ -209,7 +209,7 @@ bool or_iterator_t::take_id(result_iter_state_t& istate, uint32_t id, bool& is_e
|
||||
}
|
||||
|
||||
if (istate.fit != nullptr && istate.fit->approx_filter_ids_length > 0) {
|
||||
if (istate.fit->valid(id) == 1) {
|
||||
if (istate.fit->is_valid(id) == 1) {
|
||||
istate.fit->next();
|
||||
return true;
|
||||
}
|
||||
@ -261,7 +261,7 @@ bool or_iterator_t::take_id(result_iter_state_t& istate, uint32_t id, bool& is_e
|
||||
}
|
||||
|
||||
if (istate.fit != nullptr && istate.fit->approx_filter_ids_length > 0) {
|
||||
if (istate.fit->valid(id) == 1) {
|
||||
if (istate.fit->is_valid(id) == 1) {
|
||||
filter_result.seq_id = id;
|
||||
filter_result.reference_filter_results = std::move(istate.fit->reference);
|
||||
istate.fit->next();
|
||||
|
@ -1879,7 +1879,7 @@ bool result_iter_state_t::is_filter_valid() const {
|
||||
}
|
||||
|
||||
if (fit != nullptr) {
|
||||
return fit->is_valid;
|
||||
return fit->validity == filter_result_iterator_t::valid;
|
||||
}
|
||||
|
||||
return false;
|
||||
@ -1890,7 +1890,7 @@ uint32_t result_iter_state_t::get_filter_id() const {
|
||||
return filter_ids[filter_ids_index];
|
||||
}
|
||||
|
||||
if (fit != nullptr && fit->is_valid) {
|
||||
if (fit != nullptr && fit->validity == filter_result_iterator_t::valid) {
|
||||
return fit->seq_id;
|
||||
}
|
||||
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include <collection_manager.h>
|
||||
#include <filter.h>
|
||||
#include <posting.h>
|
||||
#include <chrono>
|
||||
#include "collection.h"
|
||||
|
||||
class FilterTest : public ::testing::Test {
|
||||
@ -65,7 +66,7 @@ TEST_F(FilterTest, FilterTreeIterator) {
|
||||
auto iter_null_filter_tree_test = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root);
|
||||
|
||||
ASSERT_TRUE(iter_null_filter_tree_test.init_status().ok());
|
||||
ASSERT_FALSE(iter_null_filter_tree_test.is_valid);
|
||||
ASSERT_EQ(filter_result_iterator_t::invalid, iter_null_filter_tree_test.validity);
|
||||
|
||||
Option<bool> filter_op = filter::parse_filter_query("name: foo", coll->get_schema(), store, doc_id_prefix,
|
||||
filter_tree_root);
|
||||
@ -74,7 +75,7 @@ TEST_F(FilterTest, FilterTreeIterator) {
|
||||
auto iter_no_match_test = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root);
|
||||
|
||||
ASSERT_TRUE(iter_no_match_test.init_status().ok());
|
||||
ASSERT_FALSE(iter_no_match_test.is_valid);
|
||||
ASSERT_EQ(filter_result_iterator_t::invalid, iter_no_match_test.validity);
|
||||
|
||||
delete filter_tree_root;
|
||||
filter_tree_root = nullptr;
|
||||
@ -85,7 +86,7 @@ TEST_F(FilterTest, FilterTreeIterator) {
|
||||
auto iter_no_match_multi_test = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root);
|
||||
|
||||
ASSERT_TRUE(iter_no_match_multi_test.init_status().ok());
|
||||
ASSERT_FALSE(iter_no_match_multi_test.is_valid);
|
||||
ASSERT_EQ(filter_result_iterator_t::invalid, iter_no_match_multi_test.validity);
|
||||
|
||||
delete filter_tree_root;
|
||||
filter_tree_root = nullptr;
|
||||
@ -97,11 +98,11 @@ TEST_F(FilterTest, FilterTreeIterator) {
|
||||
ASSERT_TRUE(iter_contains_test.init_status().ok());
|
||||
|
||||
for (uint32_t i = 0; i < 5; i++) {
|
||||
ASSERT_TRUE(iter_contains_test.is_valid);
|
||||
ASSERT_EQ(filter_result_iterator_t::valid, iter_contains_test.validity);
|
||||
ASSERT_EQ(i, iter_contains_test.seq_id);
|
||||
iter_contains_test.next();
|
||||
}
|
||||
ASSERT_FALSE(iter_contains_test.is_valid);
|
||||
ASSERT_EQ(filter_result_iterator_t::invalid, iter_contains_test.validity);
|
||||
|
||||
delete filter_tree_root;
|
||||
filter_tree_root = nullptr;
|
||||
@ -113,11 +114,11 @@ TEST_F(FilterTest, FilterTreeIterator) {
|
||||
ASSERT_TRUE(iter_contains_multi_test.init_status().ok());
|
||||
|
||||
for (uint32_t i = 0; i < 5; i++) {
|
||||
ASSERT_TRUE(iter_contains_multi_test.is_valid);
|
||||
ASSERT_EQ(filter_result_iterator_t::valid, iter_contains_multi_test.validity);
|
||||
ASSERT_EQ(i, iter_contains_multi_test.seq_id);
|
||||
iter_contains_multi_test.next();
|
||||
}
|
||||
ASSERT_FALSE(iter_contains_multi_test.is_valid);
|
||||
ASSERT_EQ(filter_result_iterator_t::invalid, iter_contains_multi_test.validity);
|
||||
|
||||
delete filter_tree_root;
|
||||
filter_tree_root = nullptr;
|
||||
@ -129,11 +130,11 @@ TEST_F(FilterTest, FilterTreeIterator) {
|
||||
ASSERT_TRUE(iter_exact_match_1_test.init_status().ok());
|
||||
|
||||
for (uint32_t i = 0; i < 5; i++) {
|
||||
ASSERT_TRUE(iter_exact_match_1_test.is_valid);
|
||||
ASSERT_EQ(filter_result_iterator_t::valid, iter_exact_match_1_test.validity);
|
||||
ASSERT_EQ(i, iter_exact_match_1_test.seq_id);
|
||||
iter_exact_match_1_test.next();
|
||||
}
|
||||
ASSERT_FALSE(iter_exact_match_1_test.is_valid);
|
||||
ASSERT_EQ(filter_result_iterator_t::invalid, iter_exact_match_1_test.validity);
|
||||
|
||||
delete filter_tree_root;
|
||||
filter_tree_root = nullptr;
|
||||
@ -143,7 +144,7 @@ TEST_F(FilterTest, FilterTreeIterator) {
|
||||
|
||||
auto iter_exact_match_2_test = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root);
|
||||
ASSERT_TRUE(iter_exact_match_2_test.init_status().ok());
|
||||
ASSERT_FALSE(iter_exact_match_2_test.is_valid);
|
||||
ASSERT_EQ(filter_result_iterator_t::invalid, iter_exact_match_2_test.validity);
|
||||
|
||||
delete filter_tree_root;
|
||||
filter_tree_root = nullptr;
|
||||
@ -156,11 +157,11 @@ TEST_F(FilterTest, FilterTreeIterator) {
|
||||
|
||||
std::vector<int> expected = {0, 2, 3, 4};
|
||||
for (auto const& i : expected) {
|
||||
ASSERT_TRUE(iter_exact_match_multi_test.is_valid);
|
||||
ASSERT_EQ(filter_result_iterator_t::valid, iter_exact_match_multi_test.validity);
|
||||
ASSERT_EQ(i, iter_exact_match_multi_test.seq_id);
|
||||
iter_exact_match_multi_test.next();
|
||||
}
|
||||
ASSERT_FALSE(iter_exact_match_multi_test.is_valid);
|
||||
ASSERT_EQ(filter_result_iterator_t::invalid, iter_exact_match_multi_test.validity);
|
||||
|
||||
delete filter_tree_root;
|
||||
filter_tree_root = nullptr;
|
||||
@ -173,12 +174,12 @@ TEST_F(FilterTest, FilterTreeIterator) {
|
||||
|
||||
expected = {1, 3};
|
||||
for (auto const& i : expected) {
|
||||
ASSERT_TRUE(iter_not_equals_test.is_valid);
|
||||
ASSERT_EQ(filter_result_iterator_t::valid, iter_not_equals_test.validity);
|
||||
ASSERT_EQ(i, iter_not_equals_test.seq_id);
|
||||
iter_not_equals_test.next();
|
||||
}
|
||||
|
||||
ASSERT_FALSE(iter_not_equals_test.is_valid);
|
||||
ASSERT_EQ(filter_result_iterator_t::invalid, iter_not_equals_test.validity);
|
||||
|
||||
delete filter_tree_root;
|
||||
filter_tree_root = nullptr;
|
||||
@ -189,13 +190,13 @@ TEST_F(FilterTest, FilterTreeIterator) {
|
||||
auto iter_skip_test1 = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root);
|
||||
ASSERT_TRUE(iter_skip_test1.init_status().ok());
|
||||
|
||||
ASSERT_TRUE(iter_skip_test1.is_valid);
|
||||
ASSERT_EQ(filter_result_iterator_t::valid, iter_skip_test1.validity);
|
||||
iter_skip_test1.skip_to(3);
|
||||
ASSERT_TRUE(iter_skip_test1.is_valid);
|
||||
ASSERT_EQ(filter_result_iterator_t::valid, iter_skip_test1.validity);
|
||||
ASSERT_EQ(4, iter_skip_test1.seq_id);
|
||||
iter_skip_test1.next();
|
||||
|
||||
ASSERT_FALSE(iter_skip_test1.is_valid);
|
||||
ASSERT_EQ(filter_result_iterator_t::invalid, iter_skip_test1.validity);
|
||||
|
||||
delete filter_tree_root;
|
||||
filter_tree_root = nullptr;
|
||||
@ -206,9 +207,9 @@ TEST_F(FilterTest, FilterTreeIterator) {
|
||||
auto iter_skip_test2 = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root);
|
||||
ASSERT_TRUE(iter_skip_test2.init_status().ok());
|
||||
|
||||
ASSERT_TRUE(iter_skip_test2.is_valid);
|
||||
ASSERT_EQ(filter_result_iterator_t::valid, iter_skip_test2.validity);
|
||||
iter_skip_test2.skip_to(3);
|
||||
ASSERT_FALSE(iter_skip_test2.is_valid);
|
||||
ASSERT_EQ(filter_result_iterator_t::invalid, iter_skip_test2.validity);
|
||||
|
||||
delete filter_tree_root;
|
||||
filter_tree_root = nullptr;
|
||||
@ -219,11 +220,11 @@ TEST_F(FilterTest, FilterTreeIterator) {
|
||||
auto iter_and_test = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root);
|
||||
ASSERT_TRUE(iter_and_test.init_status().ok());
|
||||
|
||||
ASSERT_TRUE(iter_and_test.is_valid);
|
||||
ASSERT_EQ(filter_result_iterator_t::valid, iter_and_test.validity);
|
||||
ASSERT_EQ(1, iter_and_test.seq_id);
|
||||
iter_and_test.next();
|
||||
|
||||
ASSERT_FALSE(iter_and_test.is_valid);
|
||||
ASSERT_EQ(filter_result_iterator_t::invalid, iter_and_test.validity);
|
||||
|
||||
delete filter_tree_root;
|
||||
filter_tree_root = nullptr;
|
||||
@ -247,12 +248,12 @@ TEST_F(FilterTest, FilterTreeIterator) {
|
||||
|
||||
expected = {2, 4, 5};
|
||||
for (auto const& i : expected) {
|
||||
ASSERT_TRUE(iter_or_test.is_valid);
|
||||
ASSERT_EQ(filter_result_iterator_t::valid, iter_or_test.validity);
|
||||
ASSERT_EQ(i, iter_or_test.seq_id);
|
||||
iter_or_test.next();
|
||||
}
|
||||
|
||||
ASSERT_FALSE(iter_or_test.is_valid);
|
||||
ASSERT_EQ(filter_result_iterator_t::invalid, iter_or_test.validity);
|
||||
|
||||
delete filter_tree_root;
|
||||
filter_tree_root = nullptr;
|
||||
@ -263,17 +264,17 @@ TEST_F(FilterTest, FilterTreeIterator) {
|
||||
auto iter_skip_complex_filter_test = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root);
|
||||
ASSERT_TRUE(iter_skip_complex_filter_test.init_status().ok());
|
||||
|
||||
ASSERT_TRUE(iter_skip_complex_filter_test.is_valid);
|
||||
ASSERT_EQ(filter_result_iterator_t::valid, iter_skip_complex_filter_test.validity);
|
||||
iter_skip_complex_filter_test.skip_to(4);
|
||||
|
||||
expected = {4, 5};
|
||||
for (auto const& i : expected) {
|
||||
ASSERT_TRUE(iter_skip_complex_filter_test.is_valid);
|
||||
ASSERT_EQ(filter_result_iterator_t::valid, iter_skip_complex_filter_test.validity);
|
||||
ASSERT_EQ(i, iter_skip_complex_filter_test.seq_id);
|
||||
iter_skip_complex_filter_test.next();
|
||||
}
|
||||
|
||||
ASSERT_FALSE(iter_skip_complex_filter_test.is_valid);
|
||||
ASSERT_EQ(filter_result_iterator_t::invalid, iter_skip_complex_filter_test.validity);
|
||||
|
||||
delete filter_tree_root;
|
||||
filter_tree_root = nullptr;
|
||||
@ -287,7 +288,7 @@ TEST_F(FilterTest, FilterTreeIterator) {
|
||||
std::vector<int> validate_ids = {0, 1, 2, 3, 4, 5, 6}, seq_ids = {0, 2, 2, 4, 4, 5, 5};
|
||||
expected = {1, 0, 1, 0, 1, 1, -1};
|
||||
for (uint32_t i = 0; i < validate_ids.size(); i++) {
|
||||
ASSERT_EQ(expected[i], iter_validate_ids_test1.valid(validate_ids[i]));
|
||||
ASSERT_EQ(expected[i], iter_validate_ids_test1.is_valid(validate_ids[i]));
|
||||
ASSERT_EQ(seq_ids[i], iter_validate_ids_test1.seq_id);
|
||||
}
|
||||
|
||||
@ -303,7 +304,7 @@ TEST_F(FilterTest, FilterTreeIterator) {
|
||||
validate_ids = {0, 1, 2, 3, 4, 5, 6}, seq_ids = {1, 1, 5, 5, 5, 5, 5};
|
||||
expected = {0, 1, 0, 0, 0, 1, -1};
|
||||
for (uint32_t i = 0; i < validate_ids.size(); i++) {
|
||||
ASSERT_EQ(expected[i], iter_validate_ids_test2.valid(validate_ids[i]));
|
||||
ASSERT_EQ(expected[i], iter_validate_ids_test2.is_valid(validate_ids[i]));
|
||||
ASSERT_EQ(seq_ids[i], iter_validate_ids_test2.seq_id);
|
||||
}
|
||||
|
||||
@ -319,7 +320,7 @@ TEST_F(FilterTest, FilterTreeIterator) {
|
||||
validate_ids = {0, 1, 2, 3, 4, 5, 6}, seq_ids = {0, 3, 3, 4, 4, 4, 4};
|
||||
expected = {1, 0, 0, 0, 1, -1, -1};
|
||||
for (uint32_t i = 0; i < validate_ids.size(); i++) {
|
||||
ASSERT_EQ(expected[i], iter_validate_ids_test3.valid(validate_ids[i]));
|
||||
ASSERT_EQ(expected[i], iter_validate_ids_test3.is_valid(validate_ids[i]));
|
||||
ASSERT_EQ(seq_ids[i], iter_validate_ids_test3.seq_id);
|
||||
}
|
||||
|
||||
@ -337,7 +338,7 @@ TEST_F(FilterTest, FilterTreeIterator) {
|
||||
seq_ids = {1, 1, 3, 3, 5, 5, 5};
|
||||
expected = {0, 1, 0, 1, 0, 1, -1};
|
||||
for (uint32_t i = 0; i < validate_ids.size(); i++) {
|
||||
ASSERT_EQ(expected[i], iter_validate_ids_not_equals_filter_test.valid(validate_ids[i]));
|
||||
ASSERT_EQ(expected[i], iter_validate_ids_not_equals_filter_test.is_valid(validate_ids[i]));
|
||||
ASSERT_EQ(seq_ids[i], iter_validate_ids_not_equals_filter_test.seq_id);
|
||||
}
|
||||
|
||||
@ -406,20 +407,20 @@ TEST_F(FilterTest, FilterTreeIterator) {
|
||||
|
||||
expected = {0, 2, 3, 4};
|
||||
for (auto const& i : expected) {
|
||||
ASSERT_TRUE(iter_reset_test.is_valid);
|
||||
ASSERT_EQ(filter_result_iterator_t::valid, iter_reset_test.validity);
|
||||
ASSERT_EQ(i, iter_reset_test.seq_id);
|
||||
iter_reset_test.next();
|
||||
}
|
||||
ASSERT_FALSE(iter_reset_test.is_valid);
|
||||
ASSERT_EQ(filter_result_iterator_t::invalid, iter_reset_test.validity);
|
||||
|
||||
iter_reset_test.reset();
|
||||
|
||||
for (auto const& i : expected) {
|
||||
ASSERT_TRUE(iter_reset_test.is_valid);
|
||||
ASSERT_EQ(filter_result_iterator_t::valid, iter_reset_test.validity);
|
||||
ASSERT_EQ(i, iter_reset_test.seq_id);
|
||||
iter_reset_test.next();
|
||||
}
|
||||
ASSERT_FALSE(iter_reset_test.is_valid);
|
||||
ASSERT_EQ(filter_result_iterator_t::invalid, iter_reset_test.validity);
|
||||
|
||||
auto iter_move_assignment_test = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root);
|
||||
|
||||
@ -428,11 +429,11 @@ TEST_F(FilterTest, FilterTreeIterator) {
|
||||
|
||||
expected = {0, 2, 3, 4};
|
||||
for (auto const& i : expected) {
|
||||
ASSERT_TRUE(iter_move_assignment_test.is_valid);
|
||||
ASSERT_EQ(filter_result_iterator_t::valid, iter_move_assignment_test.validity);
|
||||
ASSERT_EQ(i, iter_move_assignment_test.seq_id);
|
||||
iter_move_assignment_test.next();
|
||||
}
|
||||
ASSERT_FALSE(iter_move_assignment_test.is_valid);
|
||||
ASSERT_EQ(filter_result_iterator_t::invalid, iter_move_assignment_test.validity);
|
||||
|
||||
delete filter_tree_root;
|
||||
filter_tree_root = nullptr;
|
||||
@ -453,7 +454,7 @@ TEST_F(FilterTest, FilterTreeIterator) {
|
||||
for (uint32_t i = 0; i < filter_ids_length; i++) {
|
||||
ASSERT_EQ(expected[i], filter_ids[i]);
|
||||
}
|
||||
ASSERT_FALSE(iter_to_array_test.is_valid);
|
||||
ASSERT_EQ(filter_result_iterator_t::invalid, iter_to_array_test.validity);
|
||||
|
||||
delete[] filter_ids;
|
||||
|
||||
@ -470,7 +471,7 @@ TEST_F(FilterTest, FilterTreeIterator) {
|
||||
for (uint32_t i = 0; i < and_result_length; i++) {
|
||||
ASSERT_EQ(expected[i], and_result[i]);
|
||||
}
|
||||
ASSERT_FALSE(iter_and_scalar_test.is_valid);
|
||||
ASSERT_EQ(filter_result_iterator_t::invalid, iter_and_scalar_test.validity);
|
||||
|
||||
delete[] and_result;
|
||||
delete filter_tree_root;
|
||||
@ -493,11 +494,11 @@ TEST_F(FilterTest, FilterTreeIterator) {
|
||||
auto iter_skip_test3 = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root);
|
||||
ASSERT_TRUE(iter_skip_test3.init_status().ok());
|
||||
|
||||
ASSERT_TRUE(iter_skip_test3.is_valid);
|
||||
ASSERT_EQ(filter_result_iterator_t::valid, iter_skip_test3.validity);
|
||||
iter_skip_test3.skip_to(4);
|
||||
ASSERT_EQ(4, iter_skip_test3.seq_id);
|
||||
|
||||
ASSERT_TRUE(iter_skip_test3.is_valid);
|
||||
ASSERT_EQ(filter_result_iterator_t::valid, iter_skip_test3.validity);
|
||||
|
||||
delete filter_tree_root;
|
||||
|
||||
@ -509,10 +510,10 @@ TEST_F(FilterTest, FilterTreeIterator) {
|
||||
auto iter_skip_test4 = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root);
|
||||
ASSERT_TRUE(iter_skip_test4.init_status().ok());
|
||||
|
||||
ASSERT_TRUE(iter_skip_test4.is_valid);
|
||||
ASSERT_EQ(filter_result_iterator_t::valid, iter_skip_test4.validity);
|
||||
iter_skip_test4.skip_to(6);
|
||||
ASSERT_EQ(6, iter_skip_test4.seq_id);
|
||||
ASSERT_TRUE(iter_skip_test4.is_valid);
|
||||
ASSERT_EQ(filter_result_iterator_t::valid, iter_skip_test4.validity);
|
||||
|
||||
auto iter_add_phrase_ids_test = new filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root);
|
||||
std::unique_ptr<filter_result_iterator_t> filter_iter_guard(iter_add_phrase_ids_test);
|
||||
@ -526,8 +527,56 @@ TEST_F(FilterTest, FilterTreeIterator) {
|
||||
filter_iter_guard.release();
|
||||
filter_iter_guard.reset(iter_add_phrase_ids_test);
|
||||
|
||||
ASSERT_TRUE(iter_add_phrase_ids_test->is_valid);
|
||||
ASSERT_EQ(filter_result_iterator_t::valid, iter_add_phrase_ids_test->validity);
|
||||
ASSERT_EQ(6, iter_add_phrase_ids_test->seq_id);
|
||||
|
||||
delete filter_tree_root;
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(FilterTest, FilterTreeIteratorTimeout) {
|
||||
auto count = 20;
|
||||
auto filter_ids = new uint32_t[count];
|
||||
for (auto i = 0; i < count; i++) {
|
||||
filter_ids[i] = i;
|
||||
}
|
||||
auto filter_iterator = new filter_result_iterator_t(filter_ids, count,
|
||||
std::chrono::duration_cast<std::chrono::microseconds>(
|
||||
std::chrono::system_clock::now().time_since_epoch()).count(),
|
||||
10000000); // Timeout after 10 seconds
|
||||
std::unique_ptr<filter_result_iterator_t> filter_iter_guard(filter_iterator);
|
||||
|
||||
ASSERT_EQ(filter_result_iterator_t::valid, filter_iterator->validity);
|
||||
std::this_thread::sleep_for(std::chrono::seconds(5));
|
||||
|
||||
for (auto i = 0; i < 20; i++) {
|
||||
ASSERT_EQ(filter_result_iterator_t::valid, filter_iterator->validity);
|
||||
filter_iterator->next();
|
||||
}
|
||||
ASSERT_EQ(filter_result_iterator_t::invalid, filter_iterator->validity); // End of iterator reached.
|
||||
|
||||
filter_iterator->reset();
|
||||
ASSERT_EQ(filter_result_iterator_t::valid, filter_iterator->validity);
|
||||
std::this_thread::sleep_for(std::chrono::seconds(5));
|
||||
|
||||
for (auto i = 0; i < 9; i++) {
|
||||
ASSERT_EQ(filter_result_iterator_t::valid, filter_iterator->validity);
|
||||
filter_iterator->next();
|
||||
}
|
||||
ASSERT_EQ(filter_result_iterator_t::timed_out, filter_iterator->validity);
|
||||
|
||||
filter_iterator->reset();
|
||||
ASSERT_EQ(filter_result_iterator_t::timed_out, filter_iterator->validity); // Resetting won't help with timeout.
|
||||
|
||||
uint32_t excluded_result_index = 0;
|
||||
auto result = new filter_result_t();
|
||||
filter_iterator->get_n_ids(count, excluded_result_index, nullptr, 0, result);
|
||||
|
||||
ASSERT_EQ(0, result->count); // Shouldn't return results
|
||||
delete result;
|
||||
|
||||
result = new filter_result_t();
|
||||
filter_iterator->get_n_ids(count, excluded_result_index, nullptr, 0, result, true);
|
||||
|
||||
ASSERT_EQ(count, result->count); // With `override_timeout` true, we should get result.
|
||||
delete result;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user