mirror of
https://github.com/typesense/typesense.git
synced 2025-05-20 13:42:26 +08:00
Improve lazy not iterator perf (#1669)
* Refactor `is_valid(id)` logic. * Fix `CollectionFilteringTest` and `CollectionJoinTest`. * Refactor not equals iterator logic. * Fix `CollectionJoinTest.SortByReference`. * Fix `filter_result_iterator_t::contains_atleast_one`. * Refactor `filter_result_iterator_t::and_filter_iterators`. * Refactor `filter_result_iterator_t::contains_atleast_one`. * Update `approx_filter_ids_length` in not equals.
This commit is contained in:
parent
4a4e50b3c7
commit
fca277cbf2
@ -254,11 +254,10 @@ private:
|
||||
std::vector<std::vector<posting_list_t::iterator_t>> posting_list_iterators;
|
||||
std::vector<posting_list_t*> expanded_plists;
|
||||
|
||||
/// Used in case of a not equals string filter.
|
||||
/// The iterative logic to find not equals match is to return the ids that occur in between the equals match. This
|
||||
/// might lead to returning some ids that are deleted. So we use this iterator to check and return only the ids that
|
||||
/// exist in `index->seq_ids`.
|
||||
id_list_t::iterator_t all_seq_ids_iter = id_list_t::iterator_t(nullptr, nullptr, nullptr, false);
|
||||
bool is_not_equals_iterator = false;
|
||||
uint32_t equals_iterator_id = 0;
|
||||
bool is_equals_iterator_valid = true;
|
||||
uint32_t last_valid_id = 0;
|
||||
|
||||
/// Used in case of a single boolean filter matching more than `bool_filter_ids_threshold` ids.
|
||||
num_tree_t::iterator_t bool_iterator = num_tree_t::iterator_t(nullptr, NUM_COMPARATOR::EQUALS, 0);
|
||||
@ -279,10 +278,6 @@ private:
|
||||
/// Advances all the token iterators that are at seq_id and finds the next intersection.
|
||||
void advance_string_filter_token_iterators();
|
||||
|
||||
/// Finds the first match for a filter on string field. Only used in `init()` and `reset()`. Handles `!` in string
|
||||
/// filter.
|
||||
void get_string_filter_first_match(const bool& field_is_array);
|
||||
|
||||
/// Finds the next match for a filter on string field.
|
||||
void get_string_filter_next_match(const bool& field_is_array);
|
||||
|
||||
@ -295,6 +290,10 @@ private:
|
||||
/// Updates `validity` of the iterator to `timed_out` if condition is met. Assumes `timeout_info` is not null.
|
||||
inline bool is_timed_out();
|
||||
|
||||
/// Advances the iterator until the doc value reaches or just overshoots id. The iterator may become invalid during
|
||||
/// this operation.
|
||||
void skip_to(uint32_t id);
|
||||
|
||||
public:
|
||||
uint32_t seq_id = 0;
|
||||
/// Collection name -> references
|
||||
@ -330,16 +329,18 @@ public:
|
||||
/// Recursively computes the result of each node and stores the final result in the root node.
|
||||
void compute_iterators();
|
||||
|
||||
/// Returns a tri-state:
|
||||
/// 0: id is not valid
|
||||
/// 1: id is valid
|
||||
/// -1: end of iterator / timed out
|
||||
/// Handles moving the individual iterators to id internally and checks if `id` matches the filter.
|
||||
///
|
||||
/// Handles moving the individual iterators internally.
|
||||
[[nodiscard]] int is_valid(uint32_t id);
|
||||
/// \return
|
||||
/// 0 : id is not valid
|
||||
/// 1 : id is valid
|
||||
/// -1: end of iterator / timed out
|
||||
[[nodiscard]] int is_valid(uint32_t id, const bool& override_timeout = false);
|
||||
|
||||
/// Advances the iterator to get the next value of doc and reference. The iterator may become invalid during this
|
||||
/// operation.
|
||||
///
|
||||
/// Should only be called after calling `compute_iterators()` or in conjunction with `is_valid(id)` when it returns `1`.
|
||||
void next();
|
||||
|
||||
/// Collects n doc ids while advancing the iterator. The ids present in excluded_result_ids are ignored. The
|
||||
@ -349,17 +350,16 @@ public:
|
||||
uint32_t const* const excluded_result_ids, const size_t& excluded_result_ids_size,
|
||||
filter_result_t*& result, const bool& override_timeout = false);
|
||||
|
||||
/// Advances the iterator until the doc value reaches or just overshoots id. The iterator may become invalid during
|
||||
/// this operation.
|
||||
void skip_to(uint32_t id, const bool& override_timeout = false);
|
||||
|
||||
/// Returns true if at least one id from the posting list object matches the filter.
|
||||
bool contains_atleast_one(const void* obj);
|
||||
|
||||
/// Returns to the initial state of the iterator.
|
||||
void reset(const bool& override_timeout = false);
|
||||
|
||||
/// Iterates and collects all the filter ids into filter_array.
|
||||
/// Copies filter ids from `filter_result` into `filter_array`.
|
||||
///
|
||||
/// Should only be called after calling `compute_iterators()`.
|
||||
///
|
||||
/// \return size of the filter array
|
||||
uint32_t to_filter_id_array(uint32_t*& filter_array);
|
||||
|
||||
@ -383,4 +383,12 @@ public:
|
||||
[[nodiscard]] filter_result_iterator_t* _get_right_it() const {
|
||||
return right_it;
|
||||
}
|
||||
|
||||
[[nodiscard]] uint32_t _get_equals_iterator_id() const {
|
||||
return equals_iterator_id;
|
||||
}
|
||||
|
||||
[[nodiscard]] bool _get_is_equals_iterator_valid() const {
|
||||
return is_equals_iterator_valid;
|
||||
}
|
||||
};
|
||||
|
@ -203,20 +203,52 @@ void filter_result_t::or_filter_results(const filter_result_t& a, const filter_r
|
||||
|
||||
void filter_result_iterator_t::and_filter_iterators() {
|
||||
while (left_it->validity && right_it->validity) {
|
||||
while (left_it->seq_id < right_it->seq_id) {
|
||||
left_it->skip_to(right_it->seq_id);
|
||||
if (!left_it->validity) {
|
||||
if (left_it->seq_id < right_it->seq_id) {
|
||||
auto const& left_validity = left_it->is_valid(right_it->seq_id);
|
||||
|
||||
if (left_validity == 1) {
|
||||
seq_id = right_it->seq_id;
|
||||
|
||||
reference.clear();
|
||||
for (const auto& item: left_it->reference) {
|
||||
reference[item.first] = item.second;
|
||||
}
|
||||
for (const auto& item: right_it->reference) {
|
||||
reference[item.first] = item.second;
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
if (left_validity == -1) {
|
||||
validity = invalid;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
while (left_it->seq_id > right_it->seq_id) {
|
||||
right_it->skip_to(left_it->seq_id);
|
||||
if (!right_it->validity) {
|
||||
if (left_it->seq_id > right_it->seq_id) {
|
||||
auto const& right_validity = right_it->is_valid(left_it->seq_id);
|
||||
|
||||
if (right_validity == 1) {
|
||||
seq_id = left_it->seq_id;
|
||||
|
||||
reference.clear();
|
||||
for (const auto& item: left_it->reference) {
|
||||
reference[item.first] = item.second;
|
||||
}
|
||||
for (const auto& item: right_it->reference) {
|
||||
reference[item.first] = item.second;
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
if (right_validity == -1) {
|
||||
validity = invalid;
|
||||
return;
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
if (left_it->seq_id == right_it->seq_id) {
|
||||
@ -414,10 +446,11 @@ void filter_result_iterator_t::get_string_filter_next_match(const bool& field_is
|
||||
}
|
||||
|
||||
if (one_is_valid) {
|
||||
seq_id = lowest_id;
|
||||
equals_iterator_id = seq_id = lowest_id;
|
||||
}
|
||||
|
||||
validity = one_is_valid ? valid : invalid;
|
||||
is_equals_iterator_valid = one_is_valid;
|
||||
validity = one_is_valid || is_not_equals_iterator ? valid : invalid;
|
||||
}
|
||||
|
||||
void filter_result_iterator_t::next() {
|
||||
@ -458,7 +491,7 @@ void filter_result_iterator_t::next() {
|
||||
right_it->next();
|
||||
} else if (left_it->seq_id == seq_id) {
|
||||
left_it->next();
|
||||
} else {
|
||||
} else if (right_it->seq_id == seq_id) {
|
||||
right_it->next();
|
||||
}
|
||||
|
||||
@ -477,6 +510,10 @@ void filter_result_iterator_t::next() {
|
||||
|
||||
field f = index->search_schema.at(a_filter.field_name);
|
||||
|
||||
if (is_not_equals_iterator) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (f.is_bool()) {
|
||||
bool_iterator.next();
|
||||
if (!bool_iterator.is_valid) {
|
||||
@ -487,39 +524,8 @@ void filter_result_iterator_t::next() {
|
||||
seq_id = bool_iterator.seq_id;
|
||||
return;
|
||||
} else if (f.is_string()) {
|
||||
if (filter_node->filter_exp.apply_not_equals) {
|
||||
do {
|
||||
if (++seq_id >= result_index) {
|
||||
uint32_t previous_match;
|
||||
do {
|
||||
previous_match = seq_id;
|
||||
advance_string_filter_token_iterators();
|
||||
get_string_filter_next_match(f.is_array());
|
||||
} while (validity && previous_match + 1 == seq_id);
|
||||
|
||||
if (!validity) {
|
||||
// We've reached the end of the index, no possible matches pending.
|
||||
if (previous_match >= index->seq_ids->last_id()) {
|
||||
return;
|
||||
}
|
||||
|
||||
// (previous_match, last_doc_id] are a match for not equals.
|
||||
validity = valid;
|
||||
result_index = index->seq_ids->last_id() + 1;
|
||||
seq_id = previous_match + 1;
|
||||
} else {
|
||||
result_index = seq_id;
|
||||
seq_id = previous_match + 1;
|
||||
}
|
||||
}
|
||||
all_seq_ids_iter.skip_to(seq_id);
|
||||
} while (all_seq_ids_iter.valid() && all_seq_ids_iter.id() != seq_id); // Deleted id should not be considered a match.
|
||||
return;
|
||||
}
|
||||
|
||||
advance_string_filter_token_iterators();
|
||||
get_string_filter_next_match(f.is_array());
|
||||
|
||||
return;
|
||||
}
|
||||
}
|
||||
@ -559,71 +565,6 @@ void apply_not_equals(uint32_t*&& all_ids,
|
||||
result_ids_len = to_include_ids_len;
|
||||
}
|
||||
|
||||
void filter_result_iterator_t::get_string_filter_first_match(const bool& field_is_array) {
|
||||
get_string_filter_next_match(field_is_array);
|
||||
|
||||
if (filter_node->filter_exp.apply_not_equals && index->seq_ids->num_ids() > 0) {
|
||||
// filter didn't match any id. So by applying not equals, every id in the index is a match.
|
||||
if (!validity) {
|
||||
validity = valid;
|
||||
seq_id = 0;
|
||||
result_index = index->seq_ids->last_id() + 1;
|
||||
|
||||
all_seq_ids_iter.skip_to(seq_id);
|
||||
if (all_seq_ids_iter.valid() && all_seq_ids_iter.id() != seq_id) { // Deleted id should not be considered a match.
|
||||
next();
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// [0, seq_id) are a match for not equals.
|
||||
if (seq_id > 0) {
|
||||
result_index = seq_id;
|
||||
seq_id = 0;
|
||||
|
||||
all_seq_ids_iter.skip_to(seq_id);
|
||||
if (all_seq_ids_iter.valid() && all_seq_ids_iter.id() != seq_id) { // Deleted id should not be considered a match.
|
||||
next();
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Keep ignoring the consecutive matches.
|
||||
uint32_t previous_match;
|
||||
do {
|
||||
previous_match = seq_id;
|
||||
advance_string_filter_token_iterators();
|
||||
get_string_filter_next_match(field_is_array);
|
||||
} while (validity && previous_match + 1 == seq_id);
|
||||
|
||||
if (!validity) {
|
||||
// filter matched all the ids in the index. So for not equals, there's no match.
|
||||
if (previous_match >= index->seq_ids->last_id()) {
|
||||
return;
|
||||
}
|
||||
|
||||
// (previous_match, last_doc_id] are a match for not equals.
|
||||
validity = valid;
|
||||
result_index = index->seq_ids->last_id() + 1;
|
||||
seq_id = previous_match + 1;
|
||||
|
||||
all_seq_ids_iter.skip_to(seq_id);
|
||||
if (all_seq_ids_iter.valid() && all_seq_ids_iter.id() != seq_id) { // Deleted id should not be considered a match.
|
||||
next();
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
result_index = seq_id;
|
||||
seq_id = previous_match + 1;
|
||||
|
||||
all_seq_ids_iter.skip_to(seq_id);
|
||||
if (all_seq_ids_iter.valid() && all_seq_ids_iter.id() != seq_id) { // Deleted id should not be considered a match.
|
||||
next();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void filter_result_iterator_t::init() {
|
||||
if (filter_node == nullptr) {
|
||||
return;
|
||||
@ -1290,29 +1231,33 @@ void filter_result_iterator_t::init() {
|
||||
return;
|
||||
}
|
||||
|
||||
if (a_filter.apply_not_equals &&
|
||||
index->seq_ids->num_ids() - approx_filter_ids_length < string_filter_ids_threshold) {
|
||||
// Since there are very few matches, and we have to apply not equals, iteration will be inefficient.
|
||||
compute_iterators();
|
||||
return;
|
||||
} else if (a_filter.apply_not_equals) {
|
||||
all_seq_ids_iter = index->seq_ids->new_iterator();
|
||||
if (a_filter.apply_not_equals) {
|
||||
auto const& num_ids = index->seq_ids->num_ids();
|
||||
approx_filter_ids_length = approx_filter_ids_length >= num_ids ? num_ids : (num_ids - approx_filter_ids_length);
|
||||
|
||||
if (approx_filter_ids_length < string_filter_ids_threshold) {
|
||||
// Since there are very few matches, and we have to apply not equals, iteration will be inefficient.
|
||||
compute_iterators();
|
||||
return;
|
||||
} else {
|
||||
is_not_equals_iterator = true;
|
||||
}
|
||||
} else if (approx_filter_ids_length < string_filter_ids_threshold) {
|
||||
compute_iterators();
|
||||
return;
|
||||
}
|
||||
|
||||
get_string_filter_first_match(f.is_array());
|
||||
get_string_filter_next_match(f.is_array());
|
||||
if (is_not_equals_iterator) {
|
||||
seq_id = 0;
|
||||
last_valid_id = index->seq_ids->last_id();
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
void filter_result_iterator_t::skip_to(uint32_t id, const bool& override_timeout) {
|
||||
if (validity == invalid || (!override_timeout && timeout_info != nullptr && is_timed_out())) {
|
||||
return;
|
||||
}
|
||||
|
||||
// No need to traverse iterator tree if there's only one filter or compute_iterators() has been called.
|
||||
void filter_result_iterator_t::skip_to(uint32_t id) {
|
||||
if (is_filter_result_initialized) {
|
||||
ArrayUtils::skip_index_to_id(result_index, filter_result.docs, filter_result.count, id);
|
||||
|
||||
@ -1331,20 +1276,6 @@ void filter_result_iterator_t::skip_to(uint32_t id, const bool& override_timeout
|
||||
return;
|
||||
}
|
||||
|
||||
if (filter_node->isOperator) {
|
||||
// Skip the subtrees to id and then apply operators to arrive at the next valid doc.
|
||||
left_it->skip_to(id);
|
||||
right_it->skip_to(id);
|
||||
|
||||
if (filter_node->filter_operator == AND) {
|
||||
and_filter_iterators();
|
||||
} else {
|
||||
or_filter_iterators();
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
const filter a_filter = filter_node->filter_exp;
|
||||
|
||||
if (!index->field_is_indexed(a_filter.field_name)) {
|
||||
@ -1364,56 +1295,6 @@ void filter_result_iterator_t::skip_to(uint32_t id, const bool& override_timeout
|
||||
seq_id = bool_iterator.seq_id;
|
||||
return;
|
||||
} else if (f.is_string()) {
|
||||
if (filter_node->filter_exp.apply_not_equals) {
|
||||
if (id < seq_id) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (id < result_index) {
|
||||
seq_id = id;
|
||||
return;
|
||||
}
|
||||
|
||||
seq_id = result_index;
|
||||
uint32_t previous_match;
|
||||
|
||||
// Keep ignoring the found gaps till they cannot contain id.
|
||||
do {
|
||||
do {
|
||||
previous_match = seq_id;
|
||||
advance_string_filter_token_iterators();
|
||||
get_string_filter_next_match(f.is_array());
|
||||
} while (validity && previous_match + 1 == seq_id);
|
||||
} while (validity && seq_id <= id);
|
||||
|
||||
if (!validity) {
|
||||
// filter matched all the ids in the index. So for not equals, there's no match.
|
||||
if (previous_match >= index->seq_ids->last_id()) {
|
||||
return;
|
||||
}
|
||||
|
||||
validity = valid;
|
||||
seq_id = previous_match + 1;
|
||||
result_index = index->seq_ids->last_id() + 1;
|
||||
|
||||
// Skip to id, if possible.
|
||||
if (seq_id < id && id < result_index) {
|
||||
seq_id = id;
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
result_index = seq_id;
|
||||
seq_id = previous_match + 1;
|
||||
|
||||
if (seq_id < id && id < result_index) {
|
||||
seq_id = id;
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
// Skip all the token iterators and find a new match.
|
||||
for (auto& filter_value_tokens : posting_list_iterators) {
|
||||
for (auto& token: filter_value_tokens) {
|
||||
@ -1431,8 +1312,8 @@ void filter_result_iterator_t::skip_to(uint32_t id, const bool& override_timeout
|
||||
}
|
||||
}
|
||||
|
||||
int filter_result_iterator_t::is_valid(uint32_t id) {
|
||||
if (validity != valid) {
|
||||
int filter_result_iterator_t::is_valid(uint32_t id, const bool& override_timeout) {
|
||||
if (validity == invalid || (!override_timeout && timeout_info != nullptr && is_timed_out())) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
@ -1442,10 +1323,6 @@ int filter_result_iterator_t::is_valid(uint32_t id) {
|
||||
return validity ? (seq_id == id ? 1 : 0) : -1;
|
||||
}
|
||||
|
||||
if (timeout_info != nullptr && is_timed_out()) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (filter_node->isOperator) {
|
||||
// We only need to consider only valid/invalid state since child nodes can never time out.
|
||||
auto left_validity = left_it->is_valid(id), right_validity = right_it->is_valid(id);
|
||||
@ -1458,21 +1335,19 @@ int filter_result_iterator_t::is_valid(uint32_t id) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// id did not match the filter but both of the sub-iterators are still valid.
|
||||
// Updating seq_id to the next potential match.
|
||||
if (left_validity == 0 && right_validity == 0) {
|
||||
seq_id = std::max(left_it->seq_id, right_it->seq_id);
|
||||
} else if (left_validity == 0) {
|
||||
seq_id = left_it->seq_id;
|
||||
} else {
|
||||
seq_id = right_it->seq_id;
|
||||
}
|
||||
|
||||
seq_id = std::max(left_it->seq_id, right_it->seq_id);
|
||||
return 0;
|
||||
}
|
||||
|
||||
seq_id = id;
|
||||
and_filter_iterators();
|
||||
|
||||
reference.clear();
|
||||
for (const auto& item: left_it->reference) {
|
||||
reference[item.first] = item.second;
|
||||
}
|
||||
for (const auto& item: right_it->reference) {
|
||||
reference[item.first] = item.second;
|
||||
}
|
||||
return 1;
|
||||
} else {
|
||||
validity = (left_it->validity == valid || right_it->validity == valid) ? valid : invalid;
|
||||
@ -1480,28 +1355,63 @@ int filter_result_iterator_t::is_valid(uint32_t id) {
|
||||
if (left_validity < 1 && right_validity < 1) {
|
||||
if (left_validity == -1 && right_validity == -1) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// id did not match the filter; both of the sub-iterators or one of them might be valid.
|
||||
// Updating seq_id to the next match.
|
||||
if (left_validity == 0 && right_validity == 0) {
|
||||
seq_id = std::min(left_it->seq_id, right_it->seq_id);
|
||||
} else if (left_validity == 0) {
|
||||
seq_id = left_it->seq_id;
|
||||
} else {
|
||||
} else if (left_validity == -1) {
|
||||
seq_id = right_it->seq_id;
|
||||
return 0;
|
||||
} else if (right_validity == -1) {
|
||||
seq_id = left_it->seq_id;
|
||||
return 0;
|
||||
}
|
||||
|
||||
seq_id = std::min(left_it->seq_id, right_it->seq_id);
|
||||
return 0;
|
||||
}
|
||||
|
||||
seq_id = id;
|
||||
or_filter_iterators();
|
||||
|
||||
reference.clear();
|
||||
if (left_validity == 1) {
|
||||
for (const auto& item: left_it->reference) {
|
||||
reference[item.first] = item.second;
|
||||
}
|
||||
}
|
||||
if (right_validity == 1) {
|
||||
for (const auto& item: right_it->reference) {
|
||||
reference[item.first] = item.second;
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (is_not_equals_iterator) {
|
||||
if (id > last_valid_id) {
|
||||
validity = invalid;
|
||||
return -1;
|
||||
}
|
||||
|
||||
validity = valid;
|
||||
seq_id = id + 1;
|
||||
|
||||
if (!is_equals_iterator_valid || id < equals_iterator_id) {
|
||||
return 1;
|
||||
} else if (id == equals_iterator_id) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
skip_to(id);
|
||||
|
||||
if (is_not_equals_iterator) {
|
||||
validity = valid;
|
||||
seq_id = id + 1;
|
||||
|
||||
if (id == equals_iterator_id) {
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
return validity ? (seq_id == id ? 1 : 0) : -1;
|
||||
}
|
||||
|
||||
@ -1518,40 +1428,66 @@ Option<bool> filter_result_iterator_t::init_status() {
|
||||
}
|
||||
|
||||
bool filter_result_iterator_t::contains_atleast_one(const void *obj) {
|
||||
if (validity != valid) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if(IS_COMPACT_POSTING(obj)) {
|
||||
compact_posting_list_t* list = COMPACT_POSTING_PTR(obj);
|
||||
if (list->length == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t i = 0;
|
||||
while(i < list->length && validity == valid) {
|
||||
size_t num_existing_offsets = list->id_offsets[i];
|
||||
size_t existing_id = list->id_offsets[i + num_existing_offsets + 1];
|
||||
size_t num_existing_offsets = list->id_offsets[i];
|
||||
size_t existing_id = list->id_offsets[i + num_existing_offsets + 1];
|
||||
|
||||
if (existing_id == seq_id) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// advance smallest value
|
||||
while (true) {
|
||||
if (existing_id < seq_id) {
|
||||
i += num_existing_offsets + 2;
|
||||
|
||||
if (i >= list->length) {
|
||||
return false;
|
||||
}
|
||||
|
||||
num_existing_offsets = list->id_offsets[i];
|
||||
existing_id = list->id_offsets[i + num_existing_offsets + 1];
|
||||
} else if (existing_id > seq_id) {
|
||||
auto const& result = is_valid(existing_id);
|
||||
|
||||
if (result == 1) {
|
||||
return true;
|
||||
} else if (result == -1) {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
skip_to(existing_id);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
auto list = (posting_list_t*)(obj);
|
||||
posting_list_t::iterator_t it = list->new_iterator();
|
||||
if (!it.valid()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
while(it.valid() && validity == valid) {
|
||||
uint32_t id = it.id();
|
||||
|
||||
if(id == seq_id) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if(id < seq_id) {
|
||||
while (true) {
|
||||
if (it.id() < seq_id) {
|
||||
it.skip_to(seq_id);
|
||||
|
||||
if (!it.valid()) {
|
||||
return false;
|
||||
}
|
||||
} else if (it.id() > seq_id) {
|
||||
auto const& result = is_valid(it.id());
|
||||
|
||||
if (result == 1) {
|
||||
return true;
|
||||
} else if (result == -1) {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
skip_to(id);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1632,37 +1568,23 @@ void filter_result_iterator_t::reset(const bool& override_timeout) {
|
||||
}
|
||||
}
|
||||
|
||||
if (a_filter.apply_not_equals &&
|
||||
(index->seq_ids->num_ids() - approx_filter_ids_length) >= string_filter_ids_threshold) {
|
||||
all_seq_ids_iter = index->seq_ids->new_iterator();
|
||||
get_string_filter_next_match(f.is_array());
|
||||
if (is_not_equals_iterator) {
|
||||
seq_id = 0;
|
||||
}
|
||||
|
||||
get_string_filter_first_match(f.is_array());
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t filter_result_iterator_t::to_filter_id_array(uint32_t*& filter_array) {
|
||||
if (validity != valid) {
|
||||
if (!is_filter_result_initialized) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (is_filter_result_initialized) {
|
||||
filter_array = new uint32_t[filter_result.count];
|
||||
std::copy(filter_result.docs, filter_result.docs + filter_result.count, filter_array);
|
||||
return filter_result.count;
|
||||
}
|
||||
|
||||
std::vector<uint32_t> filter_ids;
|
||||
do {
|
||||
filter_ids.push_back(seq_id);
|
||||
next();
|
||||
} while (validity == valid);
|
||||
|
||||
filter_array = new uint32_t[filter_ids.size()];
|
||||
std::copy(filter_ids.begin(), filter_ids.end(), filter_array);
|
||||
|
||||
return filter_ids.size();
|
||||
filter_array = new uint32_t[filter_result.count];
|
||||
std::copy(filter_result.docs, filter_result.docs + filter_result.count, filter_array);
|
||||
return filter_result.count;
|
||||
}
|
||||
|
||||
uint32_t filter_result_iterator_t::and_scalar(const uint32_t* A, const uint32_t& lenA, uint32_t*& results) {
|
||||
@ -1676,14 +1598,13 @@ uint32_t filter_result_iterator_t::and_scalar(const uint32_t* A, const uint32_t&
|
||||
|
||||
std::vector<uint32_t> filter_ids;
|
||||
for (uint32_t i = 0; i < lenA; i++) {
|
||||
auto result = is_valid(A[i]);
|
||||
|
||||
if (result == -1) {
|
||||
break;
|
||||
}
|
||||
auto const& id = A[i];
|
||||
auto const& result = is_valid(id);
|
||||
|
||||
if (result == 1) {
|
||||
filter_ids.push_back(A[i]);
|
||||
filter_ids.push_back(id);
|
||||
} else if (result == -1) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1710,14 +1631,13 @@ void filter_result_iterator_t::and_scalar(const uint32_t* A, const uint32_t& len
|
||||
|
||||
std::vector<uint32_t> filter_ids;
|
||||
for (uint32_t i = 0; i < lenA; i++) {
|
||||
auto _result = is_valid(A[i]);
|
||||
|
||||
if (_result == -1) {
|
||||
break;
|
||||
}
|
||||
auto const& id = A[i];
|
||||
auto const& _result = is_valid(id);
|
||||
|
||||
if (_result == 1) {
|
||||
filter_ids.push_back(A[i]);
|
||||
filter_ids.push_back(id);
|
||||
} else if (_result == -1) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1739,12 +1659,10 @@ void filter_result_iterator_t::and_scalar(const uint32_t* A, const uint32_t& len
|
||||
for (uint32_t i = 0; i < lenA; i++) {
|
||||
auto _result = is_valid(A[i]);
|
||||
|
||||
if (_result == -1) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (_result == 1) {
|
||||
match_indexes.push_back(result_index);
|
||||
} else if (_result == -1) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1987,25 +1905,15 @@ filter_result_iterator_t::filter_result_iterator_t(uint32_t* ids, const uint32_t
|
||||
|
||||
void filter_result_iterator_t::add_phrase_ids(filter_result_iterator_t*& fit,
|
||||
uint32_t* phrase_result_ids, const uint32_t& phrase_result_count) {
|
||||
fit->reset();
|
||||
|
||||
auto root_iterator = new filter_result_iterator_t(std::min(phrase_result_count, fit->approx_filter_ids_length));
|
||||
root_iterator->left_it = new filter_result_iterator_t(phrase_result_ids, phrase_result_count);
|
||||
root_iterator->right_it = fit;
|
||||
|
||||
auto& left_it = root_iterator->left_it;
|
||||
auto& right_it = root_iterator->right_it;
|
||||
|
||||
while (left_it->validity && right_it->validity && left_it->seq_id != right_it->seq_id) {
|
||||
if (left_it->seq_id < right_it->seq_id) {
|
||||
left_it->skip_to(right_it->seq_id);
|
||||
} else {
|
||||
right_it->skip_to(left_it->seq_id);
|
||||
}
|
||||
}
|
||||
|
||||
root_iterator->timeout_info = std::move(fit->timeout_info);
|
||||
root_iterator->validity = (left_it->validity == timed_out || right_it->validity == timed_out) ? timed_out :
|
||||
(left_it->validity == invalid || right_it->validity == invalid) ? invalid : valid;
|
||||
root_iterator->seq_id = left_it->seq_id;
|
||||
|
||||
root_iterator->and_filter_iterators();
|
||||
|
||||
fit = root_iterator;
|
||||
}
|
||||
|
||||
|
@ -2927,34 +2927,36 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
|
||||
|
||||
std::vector<std::pair<float, single_filter_result_t>> dist_results;
|
||||
|
||||
uint32_t filter_id_count = 0;
|
||||
while (!no_filters_provided &&
|
||||
filter_id_count < vector_query.flat_search_cutoff && filter_result_iterator->validity == filter_result_iterator_t::valid) {
|
||||
auto& seq_id = filter_result_iterator->seq_id;
|
||||
auto filter_result = single_filter_result_t(seq_id, std::move(filter_result_iterator->reference));
|
||||
filter_result_iterator->next();
|
||||
std::vector<float> values;
|
||||
filter_result_iterator->compute_iterators();
|
||||
|
||||
try {
|
||||
values = field_vector_index->vecdex->getDataByLabel<float>(seq_id);
|
||||
} catch(...) {
|
||||
// likely not found
|
||||
continue;
|
||||
uint32_t filter_id_count = filter_result_iterator->approx_filter_ids_length;
|
||||
if (!no_filters_provided && filter_id_count < vector_query.flat_search_cutoff) {
|
||||
while (filter_result_iterator->validity == filter_result_iterator_t::valid) {
|
||||
auto &seq_id = filter_result_iterator->seq_id;
|
||||
auto filter_result = single_filter_result_t(seq_id, std::move(filter_result_iterator->reference));
|
||||
filter_result_iterator->next();
|
||||
std::vector<float> values;
|
||||
|
||||
try {
|
||||
values = field_vector_index->vecdex->getDataByLabel<float>(seq_id);
|
||||
} catch (...) {
|
||||
// likely not found
|
||||
continue;
|
||||
}
|
||||
|
||||
float dist;
|
||||
if (field_vector_index->distance_type == cosine) {
|
||||
std::vector<float> normalized_q(vector_query.values.size());
|
||||
hnsw_index_t::normalize_vector(vector_query.values, normalized_q);
|
||||
dist = field_vector_index->space->get_dist_func()(normalized_q.data(), values.data(),
|
||||
&field_vector_index->num_dim);
|
||||
} else {
|
||||
dist = field_vector_index->space->get_dist_func()(vector_query.values.data(), values.data(),
|
||||
&field_vector_index->num_dim);
|
||||
}
|
||||
|
||||
dist_results.emplace_back(dist, filter_result);
|
||||
}
|
||||
|
||||
float dist;
|
||||
if(field_vector_index->distance_type == cosine) {
|
||||
std::vector<float> normalized_q(vector_query.values.size());
|
||||
hnsw_index_t::normalize_vector(vector_query.values, normalized_q);
|
||||
dist = field_vector_index->space->get_dist_func()(normalized_q.data(), values.data(),
|
||||
&field_vector_index->num_dim);
|
||||
} else {
|
||||
dist = field_vector_index->space->get_dist_func()(vector_query.values.data(), values.data(),
|
||||
&field_vector_index->num_dim);
|
||||
}
|
||||
|
||||
dist_results.emplace_back(dist, filter_result);
|
||||
filter_id_count++;
|
||||
}
|
||||
filter_result_iterator->reset();
|
||||
search_cutoff = search_cutoff || filter_result_iterator->validity == filter_result_iterator_t::timed_out;
|
||||
@ -2989,9 +2991,12 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
|
||||
search_cutoff = true;
|
||||
}
|
||||
|
||||
// The doc_id must be valid otherwise it would've been filtered out upstream.
|
||||
filter_result_iterator->skip_to(pair.second, search_cutoff);
|
||||
auto filter_result = single_filter_result_t(pair.second,
|
||||
auto const& seq_id = pair.second;
|
||||
if (filter_result_iterator->is_valid(seq_id, search_cutoff) != 1) {
|
||||
continue;
|
||||
}
|
||||
// The seq_id must be valid otherwise it would've been filtered out upstream.
|
||||
auto filter_result = single_filter_result_t(seq_id,
|
||||
std::move(filter_result_iterator->reference));
|
||||
dist_results.emplace_back(pair.first, filter_result);
|
||||
}
|
||||
@ -3447,7 +3452,9 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
|
||||
auto& vec_result = vec_results[res_index];
|
||||
auto seq_id = vec_result.first;
|
||||
|
||||
filter_result_iterator->skip_to(seq_id);
|
||||
if (!no_filters_provided && filter_result_iterator->is_valid(seq_id) != 1) {
|
||||
continue;
|
||||
}
|
||||
auto references = std::move(filter_result_iterator->reference);
|
||||
filter_result_iterator->reset();
|
||||
|
||||
@ -5404,6 +5411,7 @@ Option<bool> Index::do_phrase_search(const size_t num_search_fields, const std::
|
||||
return Option<bool>(true);
|
||||
}
|
||||
|
||||
filter_result_iterator->compute_iterators();
|
||||
all_result_ids_len = filter_result_iterator->to_filter_id_array(all_result_ids);
|
||||
filter_result_iterator->reset();
|
||||
|
||||
@ -6147,6 +6155,8 @@ void Index::populate_sort_mapping(int* sort_order, std::vector<size_t>& geopoint
|
||||
if (!filter_init_op.ok()) {
|
||||
return;
|
||||
}
|
||||
|
||||
filter_result_iterator.compute_iterators();
|
||||
uint32_t* eval_ids = nullptr;
|
||||
auto eval_ids_count = filter_result_iterator.to_filter_id_array(eval_ids);
|
||||
|
||||
|
@ -209,12 +209,7 @@ bool or_iterator_t::take_id(result_iter_state_t& istate, uint32_t id, bool& is_e
|
||||
}
|
||||
|
||||
if (istate.fit != nullptr && istate.fit->approx_filter_ids_length > 0) {
|
||||
if (istate.fit->is_valid(id) == 1) {
|
||||
istate.fit->next();
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
return istate.fit->is_valid(id) == 1;
|
||||
}
|
||||
|
||||
return true;
|
||||
@ -264,6 +259,7 @@ bool or_iterator_t::take_id(result_iter_state_t& istate, uint32_t id, bool& is_e
|
||||
if (istate.fit->is_valid(id) == 1) {
|
||||
filter_result.seq_id = id;
|
||||
filter_result.reference_filter_results = std::move(istate.fit->reference);
|
||||
|
||||
istate.fit->next();
|
||||
return true;
|
||||
}
|
||||
|
@ -181,51 +181,6 @@ TEST_F(FilterTest, FilterTreeIterator) {
|
||||
|
||||
ASSERT_EQ(filter_result_iterator_t::invalid, iter_not_equals_test.validity);
|
||||
|
||||
delete filter_tree_root;
|
||||
filter_tree_root = nullptr;
|
||||
filter_op = filter::parse_filter_query("tags: gold", coll->get_schema(), store, doc_id_prefix,
|
||||
filter_tree_root);
|
||||
ASSERT_TRUE(filter_op.ok());
|
||||
|
||||
auto iter_skip_test1 = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root);
|
||||
ASSERT_TRUE(iter_skip_test1.init_status().ok());
|
||||
|
||||
ASSERT_EQ(filter_result_iterator_t::valid, iter_skip_test1.validity);
|
||||
iter_skip_test1.skip_to(3);
|
||||
ASSERT_EQ(filter_result_iterator_t::valid, iter_skip_test1.validity);
|
||||
ASSERT_EQ(4, iter_skip_test1.seq_id);
|
||||
iter_skip_test1.next();
|
||||
|
||||
ASSERT_EQ(filter_result_iterator_t::invalid, iter_skip_test1.validity);
|
||||
|
||||
delete filter_tree_root;
|
||||
filter_tree_root = nullptr;
|
||||
filter_op = filter::parse_filter_query("tags: != silver", coll->get_schema(), store, doc_id_prefix,
|
||||
filter_tree_root);
|
||||
ASSERT_TRUE(filter_op.ok());
|
||||
|
||||
auto iter_skip_test2 = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root);
|
||||
ASSERT_TRUE(iter_skip_test2.init_status().ok());
|
||||
|
||||
ASSERT_EQ(filter_result_iterator_t::valid, iter_skip_test2.validity);
|
||||
iter_skip_test2.skip_to(3);
|
||||
ASSERT_EQ(filter_result_iterator_t::invalid, iter_skip_test2.validity);
|
||||
|
||||
delete filter_tree_root;
|
||||
filter_tree_root = nullptr;
|
||||
filter_op = filter::parse_filter_query("name: jeremy && tags: fine platinum", coll->get_schema(), store, doc_id_prefix,
|
||||
filter_tree_root);
|
||||
ASSERT_TRUE(filter_op.ok());
|
||||
|
||||
auto iter_and_test = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root);
|
||||
ASSERT_TRUE(iter_and_test.init_status().ok());
|
||||
|
||||
ASSERT_EQ(filter_result_iterator_t::valid, iter_and_test.validity);
|
||||
ASSERT_EQ(1, iter_and_test.seq_id);
|
||||
iter_and_test.next();
|
||||
|
||||
ASSERT_EQ(filter_result_iterator_t::invalid, iter_and_test.validity);
|
||||
|
||||
delete filter_tree_root;
|
||||
filter_tree_root = nullptr;
|
||||
filter_op = filter::parse_filter_query("name: James || tags: bronze", coll->get_schema(), store, doc_id_prefix,
|
||||
@ -261,20 +216,21 @@ TEST_F(FilterTest, FilterTreeIterator) {
|
||||
filter_tree_root);
|
||||
ASSERT_TRUE(filter_op.ok());
|
||||
|
||||
auto iter_skip_complex_filter_test = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root);
|
||||
ASSERT_TRUE(iter_skip_complex_filter_test.init_status().ok());
|
||||
auto iter_complex_filter_test = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root);
|
||||
ASSERT_TRUE(iter_complex_filter_test.init_status().ok());
|
||||
|
||||
ASSERT_EQ(filter_result_iterator_t::valid, iter_skip_complex_filter_test.validity);
|
||||
iter_skip_complex_filter_test.skip_to(4);
|
||||
ASSERT_EQ(filter_result_iterator_t::valid, iter_complex_filter_test.validity);
|
||||
ASSERT_EQ(0, iter_complex_filter_test.is_valid(3));
|
||||
ASSERT_EQ(4, iter_complex_filter_test.seq_id);
|
||||
|
||||
expected = {4, 5};
|
||||
for (auto const& i : expected) {
|
||||
ASSERT_EQ(filter_result_iterator_t::valid, iter_skip_complex_filter_test.validity);
|
||||
ASSERT_EQ(i, iter_skip_complex_filter_test.seq_id);
|
||||
iter_skip_complex_filter_test.next();
|
||||
ASSERT_EQ(filter_result_iterator_t::valid, iter_complex_filter_test.validity);
|
||||
ASSERT_EQ(i, iter_complex_filter_test.seq_id);
|
||||
iter_complex_filter_test.next();
|
||||
}
|
||||
|
||||
ASSERT_EQ(filter_result_iterator_t::invalid, iter_skip_complex_filter_test.validity);
|
||||
ASSERT_EQ(filter_result_iterator_t::invalid, iter_complex_filter_test.validity);
|
||||
|
||||
delete filter_tree_root;
|
||||
filter_tree_root = nullptr;
|
||||
@ -285,7 +241,8 @@ TEST_F(FilterTest, FilterTreeIterator) {
|
||||
auto iter_validate_ids_test1 = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root);
|
||||
ASSERT_TRUE(iter_validate_ids_test1.init_status().ok());
|
||||
|
||||
std::vector<int> validate_ids = {0, 1, 2, 3, 4, 5, 6}, seq_ids = {0, 2, 2, 4, 4, 5, 5};
|
||||
std::vector<int> validate_ids = {0, 1, 2, 3, 4, 5, 6};
|
||||
std::vector<int> seq_ids = {0, 2, 2, 4, 4, 5, 5};
|
||||
expected = {1, 0, 1, 0, 1, 1, -1};
|
||||
for (uint32_t i = 0; i < validate_ids.size(); i++) {
|
||||
ASSERT_EQ(expected[i], iter_validate_ids_test1.is_valid(validate_ids[i]));
|
||||
@ -324,24 +281,6 @@ TEST_F(FilterTest, FilterTreeIterator) {
|
||||
ASSERT_EQ(seq_ids[i], iter_validate_ids_test3.seq_id);
|
||||
}
|
||||
|
||||
delete filter_tree_root;
|
||||
filter_tree_root = nullptr;
|
||||
filter_op = filter::parse_filter_query("name: James || tags: != gold", coll->get_schema(), store, doc_id_prefix,
|
||||
filter_tree_root);
|
||||
ASSERT_TRUE(filter_op.ok());
|
||||
|
||||
auto iter_validate_ids_not_equals_filter_test = filter_result_iterator_t(coll->get_name(), coll->_get_index(),
|
||||
filter_tree_root);
|
||||
ASSERT_TRUE(iter_validate_ids_not_equals_filter_test.init_status().ok());
|
||||
|
||||
validate_ids = {0, 1, 2, 3, 4, 5, 6};
|
||||
seq_ids = {1, 1, 3, 3, 5, 5, 5};
|
||||
expected = {0, 1, 0, 1, 0, 1, -1};
|
||||
for (uint32_t i = 0; i < validate_ids.size(); i++) {
|
||||
ASSERT_EQ(expected[i], iter_validate_ids_not_equals_filter_test.is_valid(validate_ids[i]));
|
||||
ASSERT_EQ(seq_ids[i], iter_validate_ids_not_equals_filter_test.seq_id);
|
||||
}
|
||||
|
||||
delete filter_tree_root;
|
||||
filter_tree_root = nullptr;
|
||||
filter_op = filter::parse_filter_query("tags: gold", coll->get_schema(), store, doc_id_prefix,
|
||||
@ -377,7 +316,7 @@ TEST_F(FilterTest, FilterTreeIterator) {
|
||||
ASSERT_TRUE(iter_plist_contains_atleast_one_test1.init_status().ok());
|
||||
|
||||
posting_list_t p_list1(2);
|
||||
ids = {1, 3, 5};
|
||||
ids = {1, 3};
|
||||
for (const auto &i: ids) {
|
||||
p_list1.upsert(i, {1, 2, 3});
|
||||
}
|
||||
@ -447,6 +386,7 @@ TEST_F(FilterTest, FilterTreeIterator) {
|
||||
uint32_t* filter_ids = nullptr;
|
||||
uint32_t filter_ids_length;
|
||||
|
||||
iter_to_array_test.compute_iterators();
|
||||
filter_ids_length = iter_to_array_test.to_filter_id_array(filter_ids);
|
||||
ASSERT_EQ(3, filter_ids_length);
|
||||
|
||||
@ -454,7 +394,6 @@ TEST_F(FilterTest, FilterTreeIterator) {
|
||||
for (uint32_t i = 0; i < filter_ids_length; i++) {
|
||||
ASSERT_EQ(expected[i], filter_ids[i]);
|
||||
}
|
||||
ASSERT_EQ(filter_result_iterator_t::invalid, iter_to_array_test.validity);
|
||||
|
||||
delete[] filter_ids;
|
||||
|
||||
@ -487,33 +426,8 @@ TEST_F(FilterTest, FilterTreeIterator) {
|
||||
ASSERT_TRUE(add_op.ok());
|
||||
|
||||
filter_tree_root = nullptr;
|
||||
filter_op = filter::parse_filter_query("tags: != FINE PLATINUM", coll->get_schema(), store, doc_id_prefix,
|
||||
filter_op = filter::parse_filter_query("tags: bronze", coll->get_schema(), store, doc_id_prefix,
|
||||
filter_tree_root);
|
||||
ASSERT_TRUE(filter_op.ok());
|
||||
|
||||
auto iter_skip_test3 = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root);
|
||||
ASSERT_TRUE(iter_skip_test3.init_status().ok());
|
||||
|
||||
ASSERT_EQ(filter_result_iterator_t::valid, iter_skip_test3.validity);
|
||||
iter_skip_test3.skip_to(4);
|
||||
ASSERT_EQ(4, iter_skip_test3.seq_id);
|
||||
|
||||
ASSERT_EQ(filter_result_iterator_t::valid, iter_skip_test3.validity);
|
||||
|
||||
delete filter_tree_root;
|
||||
|
||||
filter_tree_root = nullptr;
|
||||
filter_op = filter::parse_filter_query("tags: != gold", coll->get_schema(), store, doc_id_prefix,
|
||||
filter_tree_root);
|
||||
ASSERT_TRUE(filter_op.ok());
|
||||
|
||||
auto iter_skip_test4 = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root);
|
||||
ASSERT_TRUE(iter_skip_test4.init_status().ok());
|
||||
|
||||
ASSERT_EQ(filter_result_iterator_t::valid, iter_skip_test4.validity);
|
||||
iter_skip_test4.skip_to(6);
|
||||
ASSERT_EQ(6, iter_skip_test4.seq_id);
|
||||
ASSERT_EQ(filter_result_iterator_t::valid, iter_skip_test4.validity);
|
||||
|
||||
auto iter_add_phrase_ids_test = new filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root);
|
||||
std::unique_ptr<filter_result_iterator_t> filter_iter_guard(iter_add_phrase_ids_test);
|
||||
@ -528,7 +442,7 @@ TEST_F(FilterTest, FilterTreeIterator) {
|
||||
filter_iter_guard.reset(iter_add_phrase_ids_test);
|
||||
|
||||
ASSERT_EQ(filter_result_iterator_t::valid, iter_add_phrase_ids_test->validity);
|
||||
ASSERT_EQ(6, iter_add_phrase_ids_test->seq_id);
|
||||
ASSERT_EQ(2, iter_add_phrase_ids_test->seq_id);
|
||||
delete filter_tree_root;
|
||||
|
||||
filter_tree_root = nullptr;
|
||||
@ -605,26 +519,6 @@ TEST_F(FilterTest, FilterTreeIterator) {
|
||||
ASSERT_EQ(filter_result_iterator_t::invalid, iter_string_equals_test_2.validity);
|
||||
|
||||
delete filter_tree_root;
|
||||
|
||||
filter_tree_root = nullptr;
|
||||
filter_op = filter::parse_filter_query("tags: != gold", coll->get_schema(), store, doc_id_prefix,
|
||||
filter_tree_root);
|
||||
ASSERT_TRUE(filter_op.ok());
|
||||
|
||||
auto iter_string_not_equals_test = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root);
|
||||
ASSERT_TRUE(iter_string_not_equals_test.init_status().ok());
|
||||
ASSERT_FALSE(iter_string_not_equals_test._get_is_filter_result_initialized());
|
||||
|
||||
expected = {1, 3, 5, 6};
|
||||
for (auto const& i : expected) {
|
||||
ASSERT_EQ(filter_result_iterator_t::valid, iter_string_not_equals_test.validity);
|
||||
ASSERT_EQ(i, iter_string_not_equals_test.seq_id);
|
||||
iter_string_not_equals_test.next();
|
||||
}
|
||||
ASSERT_EQ(filter_result_iterator_t::invalid, iter_string_not_equals_test.validity);
|
||||
|
||||
delete filter_tree_root;
|
||||
|
||||
filter_tree_root = nullptr;
|
||||
filter_op = filter::parse_filter_query("tags: != [gold, silver]", coll->get_schema(), store, doc_id_prefix,
|
||||
filter_tree_root);
|
||||
@ -656,34 +550,6 @@ TEST_F(FilterTest, FilterTreeIterator) {
|
||||
|
||||
delete filter_tree_root;
|
||||
|
||||
filter_tree_root = nullptr;
|
||||
filter_op = filter::parse_filter_query("name: != James Rowdy", coll->get_schema(), store, doc_id_prefix,
|
||||
filter_tree_root);
|
||||
ASSERT_TRUE(filter_op.ok());
|
||||
|
||||
auto iter_string_not_equals_test_3 = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root);
|
||||
ASSERT_TRUE(iter_string_not_equals_test_3.init_status().ok());
|
||||
ASSERT_FALSE(iter_string_not_equals_test_3._get_is_filter_result_initialized());
|
||||
|
||||
expected = {1, 3, 4};
|
||||
for (auto const& i : expected) {
|
||||
ASSERT_EQ(filter_result_iterator_t::valid, iter_string_not_equals_test_3.validity);
|
||||
ASSERT_EQ(i, iter_string_not_equals_test_3.seq_id);
|
||||
iter_string_not_equals_test_3.next();
|
||||
}
|
||||
ASSERT_EQ(filter_result_iterator_t::invalid, iter_string_not_equals_test_3.validity);
|
||||
|
||||
iter_string_not_equals_test_3.reset();
|
||||
|
||||
expected = {1, 3, 4};
|
||||
for (auto const& i : expected) {
|
||||
ASSERT_EQ(filter_result_iterator_t::valid, iter_string_not_equals_test_3.validity);
|
||||
ASSERT_EQ(i, iter_string_not_equals_test_3.seq_id);
|
||||
iter_string_not_equals_test_3.next();
|
||||
}
|
||||
ASSERT_EQ(filter_result_iterator_t::invalid, iter_string_not_equals_test_3.validity);
|
||||
delete filter_tree_root;
|
||||
|
||||
Collection *bool_coll;
|
||||
|
||||
std::vector<field> fields = {field("title", field_types::STRING, false),
|
||||
@ -753,7 +619,7 @@ TEST_F(FilterTest, FilterTreeIterator) {
|
||||
ASSERT_EQ(filter_result_iterator_t::invalid, iter_boolean_test_2.validity);
|
||||
|
||||
iter_boolean_test_2.reset();
|
||||
iter_boolean_test_2.skip_to(6);
|
||||
ASSERT_EQ(0, iter_boolean_test_2.is_valid(6));
|
||||
ASSERT_EQ(filter_result_iterator_t::valid, iter_boolean_test_2.validity);
|
||||
ASSERT_EQ(7, iter_boolean_test_2.seq_id);
|
||||
|
||||
@ -803,13 +669,23 @@ TEST_F(FilterTest, FilterTreeIterator) {
|
||||
auto iter_string_prefix_value_test_2 = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root);
|
||||
ASSERT_TRUE(iter_string_prefix_value_test_2.init_status().ok());
|
||||
ASSERT_FALSE(iter_string_prefix_value_test_2._get_is_filter_result_initialized());
|
||||
ASSERT_EQ(3, iter_string_prefix_value_test_2.approx_filter_ids_length); // document 0 and 2 have been deleted.
|
||||
ASSERT_EQ(4, iter_string_prefix_value_test_2.approx_filter_ids_length); // 7 total docs, 3 approx count for equals.
|
||||
|
||||
expected = {1, 3, 5, 6, 7};
|
||||
for (auto const& i : expected) {
|
||||
validate_ids = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
|
||||
seq_ids = {1, 2, 3, 4, 5, 6, 7, 8, 9, 9};
|
||||
expected = {1, 1, 1, 1, 0, 1, 1, 1, 0, -1};
|
||||
std::vector<uint32_t > equals_match_seq_ids = {4, 4, 4, 4, 4, 8, 8, 8, 8, 8};
|
||||
std::vector<bool> equals_iterator_valid = {true, true, true, true, true, true, true, true, true, true};
|
||||
for (uint32_t i = 0; i < validate_ids.size(); i++) {
|
||||
ASSERT_EQ(filter_result_iterator_t::valid, iter_string_prefix_value_test_2.validity);
|
||||
ASSERT_EQ(i, iter_string_prefix_value_test_2.seq_id);
|
||||
iter_string_prefix_value_test_2.next();
|
||||
ASSERT_EQ(expected[i], iter_string_prefix_value_test_2.is_valid(validate_ids[i]));
|
||||
ASSERT_EQ(equals_match_seq_ids[i], iter_string_prefix_value_test_2._get_equals_iterator_id());
|
||||
ASSERT_EQ(equals_iterator_valid[i], iter_string_prefix_value_test_2._get_is_equals_iterator_valid());
|
||||
|
||||
if (expected[i] == 1) {
|
||||
iter_string_prefix_value_test_2.next();
|
||||
}
|
||||
ASSERT_EQ(seq_ids[i], iter_string_prefix_value_test_2.seq_id);
|
||||
}
|
||||
ASSERT_EQ(filter_result_iterator_t::invalid, iter_string_prefix_value_test_2.validity);
|
||||
|
||||
@ -938,3 +814,263 @@ TEST_F(FilterTest, FilterTreeInitialization) {
|
||||
delete filter_tree_root;
|
||||
filter_tree_root = nullptr;
|
||||
}
|
||||
|
||||
TEST_F(FilterTest, NotEqualsStringFilter) {
|
||||
nlohmann::json schema =
|
||||
R"({
|
||||
"name": "Collection",
|
||||
"fields": [
|
||||
{"name": "name", "type": "string"},
|
||||
{"name": "tags", "type": "string[]"}
|
||||
]
|
||||
})"_json;
|
||||
|
||||
Collection* coll = collectionManager.create_collection(schema).get();
|
||||
|
||||
std::ifstream infile(std::string(ROOT_DIR)+"test/numeric_array_documents.jsonl");
|
||||
std::string json_line;
|
||||
while (std::getline(infile, json_line)) {
|
||||
auto add_op = coll->add(json_line);
|
||||
ASSERT_TRUE(add_op.ok());
|
||||
}
|
||||
infile.close();
|
||||
|
||||
const std::string doc_id_prefix = std::to_string(coll->get_collection_id()) + "_" + Collection::DOC_ID_PREFIX + "_";
|
||||
filter_node_t* filter_tree_root = nullptr;
|
||||
|
||||
Option<bool> filter_op = filter::parse_filter_query("tags:!= gold", coll->get_schema(), store, doc_id_prefix,
|
||||
filter_tree_root);
|
||||
ASSERT_TRUE(filter_op.ok());
|
||||
|
||||
auto computed_not_equals_test = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root);
|
||||
ASSERT_TRUE(computed_not_equals_test.init_status().ok());
|
||||
ASSERT_TRUE(computed_not_equals_test._get_is_filter_result_initialized());
|
||||
|
||||
std::vector<int> expected = {1, 3};
|
||||
for (auto const& i : expected) {
|
||||
ASSERT_EQ(filter_result_iterator_t::valid, computed_not_equals_test.validity);
|
||||
ASSERT_EQ(i, computed_not_equals_test.seq_id);
|
||||
computed_not_equals_test.next();
|
||||
}
|
||||
ASSERT_EQ(filter_result_iterator_t::invalid, computed_not_equals_test.validity);
|
||||
|
||||
delete filter_tree_root;
|
||||
filter_tree_root = nullptr;
|
||||
filter_op = filter::parse_filter_query("tags: != fine platinum", coll->get_schema(), store, doc_id_prefix,
|
||||
filter_tree_root);
|
||||
ASSERT_TRUE(filter_op.ok());
|
||||
|
||||
auto iter_string_not_equals_test = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root);
|
||||
ASSERT_TRUE(iter_string_not_equals_test.init_status().ok());
|
||||
ASSERT_FALSE(iter_string_not_equals_test._get_is_filter_result_initialized());
|
||||
|
||||
std::vector<uint32_t> validate_ids = {0, 1, 2, 3, 4, 5};
|
||||
std::vector<uint32_t> seq_ids = {1, 2, 3, 4, 5, 5};
|
||||
std::vector<uint32_t> equals_match_seq_ids = {1, 1, 1, 1, 1, 1};
|
||||
std::vector<bool> equals_iterator_valid = {true, true, false, false, false, false};
|
||||
expected = {1, 0, 1, 1, 1, -1};
|
||||
for (uint32_t i = 0; i < validate_ids.size(); i++) {
|
||||
ASSERT_EQ(filter_result_iterator_t::valid, iter_string_not_equals_test.validity);
|
||||
ASSERT_EQ(expected[i], iter_string_not_equals_test.is_valid(validate_ids[i]));
|
||||
ASSERT_EQ(equals_match_seq_ids[i], iter_string_not_equals_test._get_equals_iterator_id());
|
||||
ASSERT_EQ(equals_iterator_valid[i], iter_string_not_equals_test._get_is_equals_iterator_valid());
|
||||
|
||||
if (expected[i] == 1) {
|
||||
iter_string_not_equals_test.next();
|
||||
}
|
||||
ASSERT_EQ(seq_ids[i], iter_string_not_equals_test.seq_id);
|
||||
}
|
||||
ASSERT_EQ(filter_result_iterator_t::invalid, iter_string_not_equals_test.validity);
|
||||
|
||||
delete filter_tree_root;
|
||||
filter_tree_root = nullptr;
|
||||
filter_op = filter::parse_filter_query("tags: != [gold, silver]", coll->get_schema(), store, doc_id_prefix,
|
||||
filter_tree_root);
|
||||
ASSERT_TRUE(filter_op.ok());
|
||||
auto iter_string_array_not_equals_test = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root);
|
||||
ASSERT_TRUE(iter_string_array_not_equals_test.init_status().ok());
|
||||
ASSERT_FALSE(iter_string_array_not_equals_test._get_is_filter_result_initialized());
|
||||
ASSERT_EQ(5, iter_string_array_not_equals_test.approx_filter_ids_length);
|
||||
|
||||
validate_ids = {0, 1, 2, 3, 4, 5};
|
||||
seq_ids = {1, 2, 3, 4, 5, 5};
|
||||
expected = {0, 1, 0, 0, 0, -1};
|
||||
for (uint32_t i = 0; i < validate_ids.size(); i++) {
|
||||
ASSERT_EQ(filter_result_iterator_t::valid, iter_string_array_not_equals_test.validity);
|
||||
ASSERT_EQ(expected[i], iter_string_array_not_equals_test.is_valid(validate_ids[i]));
|
||||
|
||||
if (expected[i] == 1) {
|
||||
iter_string_array_not_equals_test.next();
|
||||
}
|
||||
ASSERT_EQ(seq_ids[i], iter_string_array_not_equals_test.seq_id);
|
||||
}
|
||||
ASSERT_EQ(filter_result_iterator_t::invalid, iter_string_array_not_equals_test.validity);
|
||||
|
||||
delete filter_tree_root;
|
||||
filter_tree_root = nullptr;
|
||||
|
||||
auto docs = {
|
||||
R"({
|
||||
"name": "James Rowdy",
|
||||
"tags": ["copper"]
|
||||
})"_json,
|
||||
R"({
|
||||
"name": "James Rowdy",
|
||||
"tags": ["copper"]
|
||||
})"_json,
|
||||
R"({
|
||||
"name": "James Rowdy",
|
||||
"tags": ["gold"]
|
||||
})"_json
|
||||
};
|
||||
|
||||
for (auto const& doc: docs) {
|
||||
auto add_op = coll->add(doc.dump());
|
||||
ASSERT_TRUE(add_op.ok());
|
||||
}
|
||||
|
||||
filter_op = filter::parse_filter_query("tags: != gold", coll->get_schema(), store, doc_id_prefix,
|
||||
filter_tree_root);
|
||||
ASSERT_TRUE(filter_op.ok());
|
||||
|
||||
auto iter_string_not_equals_test_2 = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root);
|
||||
ASSERT_TRUE(iter_string_not_equals_test_2.init_status().ok());
|
||||
ASSERT_FALSE(iter_string_not_equals_test_2._get_is_filter_result_initialized());
|
||||
|
||||
validate_ids = {1, 2, 3, 4, 5, 6, 7, 8};
|
||||
seq_ids = {2, 3, 4, 5, 6, 7, 8, 8};
|
||||
expected = {1, 0, 1, 0, 1, 1, 0, -1};
|
||||
equals_match_seq_ids = {2, 2, 4, 4, 7, 7, 7, 7};
|
||||
equals_iterator_valid = {true, true, true, true, true, true, true, true};
|
||||
for (uint32_t i = 0; i < validate_ids.size(); i++) {
|
||||
ASSERT_EQ(filter_result_iterator_t::valid, iter_string_not_equals_test_2.validity);
|
||||
ASSERT_EQ(expected[i], iter_string_not_equals_test_2.is_valid(validate_ids[i]));
|
||||
ASSERT_EQ(equals_match_seq_ids[i], iter_string_not_equals_test_2._get_equals_iterator_id());
|
||||
ASSERT_EQ(equals_iterator_valid[i], iter_string_not_equals_test_2._get_is_equals_iterator_valid());
|
||||
|
||||
if (expected[i] == 1) {
|
||||
iter_string_not_equals_test_2.next();
|
||||
}
|
||||
ASSERT_EQ(seq_ids[i], iter_string_not_equals_test_2.seq_id);
|
||||
}
|
||||
ASSERT_EQ(filter_result_iterator_t::invalid, iter_string_not_equals_test_2.validity);
|
||||
|
||||
iter_string_not_equals_test_2.reset();
|
||||
validate_ids = {2, 5, 7, 8};
|
||||
seq_ids = {3, 6, 8, 8};
|
||||
expected = {0, 1, 0, -1};
|
||||
equals_match_seq_ids = {2, 7, 7, 7};
|
||||
equals_iterator_valid = {true, true, true, true};
|
||||
for (uint32_t i = 0; i < validate_ids.size(); i++) {
|
||||
ASSERT_EQ(filter_result_iterator_t::valid, iter_string_not_equals_test_2.validity);
|
||||
ASSERT_EQ(expected[i], iter_string_not_equals_test_2.is_valid(validate_ids[i]));
|
||||
ASSERT_EQ(equals_match_seq_ids[i], iter_string_not_equals_test_2._get_equals_iterator_id());
|
||||
ASSERT_EQ(equals_iterator_valid[i], iter_string_not_equals_test_2._get_is_equals_iterator_valid());
|
||||
|
||||
if (expected[i] == 1) {
|
||||
iter_string_not_equals_test_2.next();
|
||||
}
|
||||
ASSERT_EQ(seq_ids[i], iter_string_not_equals_test_2.seq_id);
|
||||
}
|
||||
ASSERT_EQ(filter_result_iterator_t::invalid, iter_string_not_equals_test_2.validity);
|
||||
|
||||
delete filter_tree_root;
|
||||
filter_tree_root = nullptr;
|
||||
|
||||
filter_op = filter::parse_filter_query("name: James || tags: != bronze", coll->get_schema(), store, doc_id_prefix,
|
||||
filter_tree_root);
|
||||
ASSERT_TRUE(filter_op.ok());
|
||||
|
||||
auto iter_not_equals_or_test = filter_result_iterator_t(coll->get_name(), coll->_get_index(),
|
||||
filter_tree_root);
|
||||
ASSERT_TRUE(iter_not_equals_or_test.init_status().ok());
|
||||
ASSERT_FALSE(iter_not_equals_or_test._get_is_filter_result_initialized());
|
||||
|
||||
validate_ids = {0, 1, 2, 3, 4, 5, 6, 7, 8};
|
||||
seq_ids = {1, 2, 3, 4, 5, 6, 7, 8, 8};
|
||||
expected = {1, 1, 0, 1, 0, 1, 1, 1, -1};
|
||||
for (uint32_t i = 0; i < validate_ids.size(); i++) {
|
||||
ASSERT_EQ(filter_result_iterator_t::valid, iter_not_equals_or_test.validity);
|
||||
ASSERT_EQ(expected[i], iter_not_equals_or_test.is_valid(validate_ids[i]));
|
||||
|
||||
if (expected[i] == 1) {
|
||||
iter_not_equals_or_test.next();
|
||||
}
|
||||
ASSERT_EQ(seq_ids[i], iter_not_equals_or_test.seq_id);
|
||||
}
|
||||
ASSERT_EQ(filter_result_iterator_t::invalid, iter_not_equals_or_test.validity);
|
||||
|
||||
delete filter_tree_root;
|
||||
filter_tree_root = nullptr;
|
||||
filter_op = filter::parse_filter_query("tags: != silver || tags: != gold", coll->get_schema(), store, doc_id_prefix,
|
||||
filter_tree_root);
|
||||
ASSERT_TRUE(filter_op.ok());
|
||||
|
||||
auto iter_not_equals_or_test_2 = filter_result_iterator_t(coll->get_name(), coll->_get_index(),
|
||||
filter_tree_root);
|
||||
ASSERT_TRUE(iter_not_equals_or_test_2.init_status().ok());
|
||||
|
||||
validate_ids = {0, 1, 2, 3, 4, 5, 6, 7, 8};
|
||||
seq_ids = {1, 2, 3, 4, 5, 6, 7, 8, 8};
|
||||
expected = {0, 1, 1, 1, 0, 1, 1, 1, -1};
|
||||
for (uint32_t i = 0; i < validate_ids.size(); i++) {
|
||||
ASSERT_EQ(filter_result_iterator_t::valid, iter_not_equals_or_test_2.validity);
|
||||
ASSERT_EQ(expected[i], iter_not_equals_or_test_2.is_valid(validate_ids[i]));
|
||||
|
||||
if (expected[i] == 1) {
|
||||
iter_not_equals_or_test_2.next();
|
||||
}
|
||||
ASSERT_EQ(seq_ids[i], iter_not_equals_or_test_2.seq_id);
|
||||
}
|
||||
ASSERT_EQ(filter_result_iterator_t::invalid, iter_not_equals_or_test_2.validity);
|
||||
|
||||
delete filter_tree_root;
|
||||
filter_tree_root = nullptr;
|
||||
filter_op = filter::parse_filter_query("name: James && tags: != gold", coll->get_schema(), store, doc_id_prefix,
|
||||
filter_tree_root);
|
||||
ASSERT_TRUE(filter_op.ok());
|
||||
|
||||
auto iter_not_equals_and_test = filter_result_iterator_t(coll->get_name(), coll->_get_index(),
|
||||
filter_tree_root);
|
||||
ASSERT_TRUE(iter_not_equals_and_test.init_status().ok());
|
||||
|
||||
validate_ids = {5, 6, 7, 8};
|
||||
seq_ids = {6, 7, 8, 8};
|
||||
expected = {1, 1, 0, -1};
|
||||
for (uint32_t i = 0; i < validate_ids.size(); i++) {
|
||||
ASSERT_EQ(filter_result_iterator_t::valid, iter_not_equals_and_test.validity);
|
||||
ASSERT_EQ(expected[i], iter_not_equals_and_test.is_valid(validate_ids[i]));
|
||||
|
||||
if (expected[i] == 1) {
|
||||
iter_not_equals_and_test.next();
|
||||
}
|
||||
ASSERT_EQ(seq_ids[i], iter_not_equals_and_test.seq_id);
|
||||
}
|
||||
ASSERT_EQ(filter_result_iterator_t::invalid, iter_not_equals_and_test.validity);
|
||||
|
||||
delete filter_tree_root;
|
||||
filter_tree_root = nullptr;
|
||||
filter_op = filter::parse_filter_query("tags: != silver && tags: != gold", coll->get_schema(), store, doc_id_prefix,
|
||||
filter_tree_root);
|
||||
ASSERT_TRUE(filter_op.ok());
|
||||
|
||||
auto iter_not_equals_and_test_2 = filter_result_iterator_t(coll->get_name(), coll->_get_index(),
|
||||
filter_tree_root);
|
||||
ASSERT_TRUE(iter_not_equals_and_test_2.init_status().ok());
|
||||
|
||||
validate_ids = {0, 1, 2, 3, 4, 5, 6, 7, 8};
|
||||
seq_ids = {1, 2, 3, 4, 5, 6, 7, 8, 8};
|
||||
expected = {0, 1, 0, 0, 0, 1, 1, 0, -1};
|
||||
for (uint32_t i = 0; i < validate_ids.size(); i++) {
|
||||
ASSERT_EQ(filter_result_iterator_t::valid, iter_not_equals_and_test_2.validity);
|
||||
ASSERT_EQ(expected[i], iter_not_equals_and_test_2.is_valid(validate_ids[i]));
|
||||
|
||||
if (expected[i] == 1) {
|
||||
iter_not_equals_and_test_2.next();
|
||||
}
|
||||
ASSERT_EQ(seq_ids[i], iter_not_equals_and_test_2.seq_id);
|
||||
}
|
||||
ASSERT_EQ(filter_result_iterator_t::invalid, iter_not_equals_and_test_2.validity);
|
||||
|
||||
delete filter_tree_root;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user