diff --git a/include/filter_result_iterator.h b/include/filter_result_iterator.h index 26028074..0e7e0467 100644 --- a/include/filter_result_iterator.h +++ b/include/filter_result_iterator.h @@ -254,11 +254,10 @@ private: std::vector> posting_list_iterators; std::vector expanded_plists; - /// Used in case of a not equals string filter. - /// The iterative logic to find not equals match is to return the ids that occur in between the equals match. This - /// might lead to returning some ids that are deleted. So we use this iterator to check and return only the ids that - /// exist in `index->seq_ids`. - id_list_t::iterator_t all_seq_ids_iter = id_list_t::iterator_t(nullptr, nullptr, nullptr, false); + bool is_not_equals_iterator = false; + uint32_t equals_iterator_id = 0; + bool is_equals_iterator_valid = true; + uint32_t last_valid_id = 0; /// Used in case of a single boolean filter matching more than `bool_filter_ids_threshold` ids. num_tree_t::iterator_t bool_iterator = num_tree_t::iterator_t(nullptr, NUM_COMPARATOR::EQUALS, 0); @@ -279,10 +278,6 @@ private: /// Advances all the token iterators that are at seq_id and finds the next intersection. void advance_string_filter_token_iterators(); - /// Finds the first match for a filter on string field. Only used in `init()` and `reset()`. Handles `!` in string - /// filter. - void get_string_filter_first_match(const bool& field_is_array); - /// Finds the next match for a filter on string field. void get_string_filter_next_match(const bool& field_is_array); @@ -295,6 +290,10 @@ private: /// Updates `validity` of the iterator to `timed_out` if condition is met. Assumes `timeout_info` is not null. inline bool is_timed_out(); + /// Advances the iterator until the doc value reaches or just overshoots id. The iterator may become invalid during + /// this operation. + void skip_to(uint32_t id); + public: uint32_t seq_id = 0; /// Collection name -> references @@ -330,16 +329,18 @@ public: /// Recursively computes the result of each node and stores the final result in the root node. void compute_iterators(); - /// Returns a tri-state: - /// 0: id is not valid - /// 1: id is valid - /// -1: end of iterator / timed out + /// Handles moving the individual iterators to id internally and checks if `id` matches the filter. /// - /// Handles moving the individual iterators internally. - [[nodiscard]] int is_valid(uint32_t id); + /// \return + /// 0 : id is not valid + /// 1 : id is valid + /// -1: end of iterator / timed out + [[nodiscard]] int is_valid(uint32_t id, const bool& override_timeout = false); /// Advances the iterator to get the next value of doc and reference. The iterator may become invalid during this /// operation. + /// + /// Should only be called after calling `compute_iterators()` or in conjunction with `is_valid(id)` when it returns `1`. void next(); /// Collects n doc ids while advancing the iterator. The ids present in excluded_result_ids are ignored. The @@ -349,17 +350,16 @@ public: uint32_t const* const excluded_result_ids, const size_t& excluded_result_ids_size, filter_result_t*& result, const bool& override_timeout = false); - /// Advances the iterator until the doc value reaches or just overshoots id. The iterator may become invalid during - /// this operation. - void skip_to(uint32_t id, const bool& override_timeout = false); - /// Returns true if at least one id from the posting list object matches the filter. bool contains_atleast_one(const void* obj); /// Returns to the initial state of the iterator. void reset(const bool& override_timeout = false); - /// Iterates and collects all the filter ids into filter_array. + /// Copies filter ids from `filter_result` into `filter_array`. + /// + /// Should only be called after calling `compute_iterators()`. + /// /// \return size of the filter array uint32_t to_filter_id_array(uint32_t*& filter_array); @@ -383,4 +383,12 @@ public: [[nodiscard]] filter_result_iterator_t* _get_right_it() const { return right_it; } + + [[nodiscard]] uint32_t _get_equals_iterator_id() const { + return equals_iterator_id; + } + + [[nodiscard]] bool _get_is_equals_iterator_valid() const { + return is_equals_iterator_valid; + } }; diff --git a/src/filter_result_iterator.cpp b/src/filter_result_iterator.cpp index 759a2ee3..393e623a 100644 --- a/src/filter_result_iterator.cpp +++ b/src/filter_result_iterator.cpp @@ -203,20 +203,52 @@ void filter_result_t::or_filter_results(const filter_result_t& a, const filter_r void filter_result_iterator_t::and_filter_iterators() { while (left_it->validity && right_it->validity) { - while (left_it->seq_id < right_it->seq_id) { - left_it->skip_to(right_it->seq_id); - if (!left_it->validity) { + if (left_it->seq_id < right_it->seq_id) { + auto const& left_validity = left_it->is_valid(right_it->seq_id); + + if (left_validity == 1) { + seq_id = right_it->seq_id; + + reference.clear(); + for (const auto& item: left_it->reference) { + reference[item.first] = item.second; + } + for (const auto& item: right_it->reference) { + reference[item.first] = item.second; + } + + return; + } + + if (left_validity == -1) { validity = invalid; return; } } - while (left_it->seq_id > right_it->seq_id) { - right_it->skip_to(left_it->seq_id); - if (!right_it->validity) { + if (left_it->seq_id > right_it->seq_id) { + auto const& right_validity = right_it->is_valid(left_it->seq_id); + + if (right_validity == 1) { + seq_id = left_it->seq_id; + + reference.clear(); + for (const auto& item: left_it->reference) { + reference[item.first] = item.second; + } + for (const auto& item: right_it->reference) { + reference[item.first] = item.second; + } + + return; + } + + if (right_validity == -1) { validity = invalid; return; } + + continue; } if (left_it->seq_id == right_it->seq_id) { @@ -414,10 +446,11 @@ void filter_result_iterator_t::get_string_filter_next_match(const bool& field_is } if (one_is_valid) { - seq_id = lowest_id; + equals_iterator_id = seq_id = lowest_id; } - validity = one_is_valid ? valid : invalid; + is_equals_iterator_valid = one_is_valid; + validity = one_is_valid || is_not_equals_iterator ? valid : invalid; } void filter_result_iterator_t::next() { @@ -458,7 +491,7 @@ void filter_result_iterator_t::next() { right_it->next(); } else if (left_it->seq_id == seq_id) { left_it->next(); - } else { + } else if (right_it->seq_id == seq_id) { right_it->next(); } @@ -477,6 +510,10 @@ void filter_result_iterator_t::next() { field f = index->search_schema.at(a_filter.field_name); + if (is_not_equals_iterator) { + return; + } + if (f.is_bool()) { bool_iterator.next(); if (!bool_iterator.is_valid) { @@ -487,39 +524,8 @@ void filter_result_iterator_t::next() { seq_id = bool_iterator.seq_id; return; } else if (f.is_string()) { - if (filter_node->filter_exp.apply_not_equals) { - do { - if (++seq_id >= result_index) { - uint32_t previous_match; - do { - previous_match = seq_id; - advance_string_filter_token_iterators(); - get_string_filter_next_match(f.is_array()); - } while (validity && previous_match + 1 == seq_id); - - if (!validity) { - // We've reached the end of the index, no possible matches pending. - if (previous_match >= index->seq_ids->last_id()) { - return; - } - - // (previous_match, last_doc_id] are a match for not equals. - validity = valid; - result_index = index->seq_ids->last_id() + 1; - seq_id = previous_match + 1; - } else { - result_index = seq_id; - seq_id = previous_match + 1; - } - } - all_seq_ids_iter.skip_to(seq_id); - } while (all_seq_ids_iter.valid() && all_seq_ids_iter.id() != seq_id); // Deleted id should not be considered a match. - return; - } - advance_string_filter_token_iterators(); get_string_filter_next_match(f.is_array()); - return; } } @@ -559,71 +565,6 @@ void apply_not_equals(uint32_t*&& all_ids, result_ids_len = to_include_ids_len; } -void filter_result_iterator_t::get_string_filter_first_match(const bool& field_is_array) { - get_string_filter_next_match(field_is_array); - - if (filter_node->filter_exp.apply_not_equals && index->seq_ids->num_ids() > 0) { - // filter didn't match any id. So by applying not equals, every id in the index is a match. - if (!validity) { - validity = valid; - seq_id = 0; - result_index = index->seq_ids->last_id() + 1; - - all_seq_ids_iter.skip_to(seq_id); - if (all_seq_ids_iter.valid() && all_seq_ids_iter.id() != seq_id) { // Deleted id should not be considered a match. - next(); - } - return; - } - - // [0, seq_id) are a match for not equals. - if (seq_id > 0) { - result_index = seq_id; - seq_id = 0; - - all_seq_ids_iter.skip_to(seq_id); - if (all_seq_ids_iter.valid() && all_seq_ids_iter.id() != seq_id) { // Deleted id should not be considered a match. - next(); - } - return; - } - - // Keep ignoring the consecutive matches. - uint32_t previous_match; - do { - previous_match = seq_id; - advance_string_filter_token_iterators(); - get_string_filter_next_match(field_is_array); - } while (validity && previous_match + 1 == seq_id); - - if (!validity) { - // filter matched all the ids in the index. So for not equals, there's no match. - if (previous_match >= index->seq_ids->last_id()) { - return; - } - - // (previous_match, last_doc_id] are a match for not equals. - validity = valid; - result_index = index->seq_ids->last_id() + 1; - seq_id = previous_match + 1; - - all_seq_ids_iter.skip_to(seq_id); - if (all_seq_ids_iter.valid() && all_seq_ids_iter.id() != seq_id) { // Deleted id should not be considered a match. - next(); - } - return; - } - - result_index = seq_id; - seq_id = previous_match + 1; - - all_seq_ids_iter.skip_to(seq_id); - if (all_seq_ids_iter.valid() && all_seq_ids_iter.id() != seq_id) { // Deleted id should not be considered a match. - next(); - } - } -} - void filter_result_iterator_t::init() { if (filter_node == nullptr) { return; @@ -1290,29 +1231,33 @@ void filter_result_iterator_t::init() { return; } - if (a_filter.apply_not_equals && - index->seq_ids->num_ids() - approx_filter_ids_length < string_filter_ids_threshold) { - // Since there are very few matches, and we have to apply not equals, iteration will be inefficient. - compute_iterators(); - return; - } else if (a_filter.apply_not_equals) { - all_seq_ids_iter = index->seq_ids->new_iterator(); + if (a_filter.apply_not_equals) { + auto const& num_ids = index->seq_ids->num_ids(); + approx_filter_ids_length = approx_filter_ids_length >= num_ids ? num_ids : (num_ids - approx_filter_ids_length); + + if (approx_filter_ids_length < string_filter_ids_threshold) { + // Since there are very few matches, and we have to apply not equals, iteration will be inefficient. + compute_iterators(); + return; + } else { + is_not_equals_iterator = true; + } } else if (approx_filter_ids_length < string_filter_ids_threshold) { compute_iterators(); return; } - get_string_filter_first_match(f.is_array()); + get_string_filter_next_match(f.is_array()); + if (is_not_equals_iterator) { + seq_id = 0; + last_valid_id = index->seq_ids->last_id(); + } + return; } } -void filter_result_iterator_t::skip_to(uint32_t id, const bool& override_timeout) { - if (validity == invalid || (!override_timeout && timeout_info != nullptr && is_timed_out())) { - return; - } - - // No need to traverse iterator tree if there's only one filter or compute_iterators() has been called. +void filter_result_iterator_t::skip_to(uint32_t id) { if (is_filter_result_initialized) { ArrayUtils::skip_index_to_id(result_index, filter_result.docs, filter_result.count, id); @@ -1331,20 +1276,6 @@ void filter_result_iterator_t::skip_to(uint32_t id, const bool& override_timeout return; } - if (filter_node->isOperator) { - // Skip the subtrees to id and then apply operators to arrive at the next valid doc. - left_it->skip_to(id); - right_it->skip_to(id); - - if (filter_node->filter_operator == AND) { - and_filter_iterators(); - } else { - or_filter_iterators(); - } - - return; - } - const filter a_filter = filter_node->filter_exp; if (!index->field_is_indexed(a_filter.field_name)) { @@ -1364,56 +1295,6 @@ void filter_result_iterator_t::skip_to(uint32_t id, const bool& override_timeout seq_id = bool_iterator.seq_id; return; } else if (f.is_string()) { - if (filter_node->filter_exp.apply_not_equals) { - if (id < seq_id) { - return; - } - - if (id < result_index) { - seq_id = id; - return; - } - - seq_id = result_index; - uint32_t previous_match; - - // Keep ignoring the found gaps till they cannot contain id. - do { - do { - previous_match = seq_id; - advance_string_filter_token_iterators(); - get_string_filter_next_match(f.is_array()); - } while (validity && previous_match + 1 == seq_id); - } while (validity && seq_id <= id); - - if (!validity) { - // filter matched all the ids in the index. So for not equals, there's no match. - if (previous_match >= index->seq_ids->last_id()) { - return; - } - - validity = valid; - seq_id = previous_match + 1; - result_index = index->seq_ids->last_id() + 1; - - // Skip to id, if possible. - if (seq_id < id && id < result_index) { - seq_id = id; - } - - return; - } - - result_index = seq_id; - seq_id = previous_match + 1; - - if (seq_id < id && id < result_index) { - seq_id = id; - } - - return; - } - // Skip all the token iterators and find a new match. for (auto& filter_value_tokens : posting_list_iterators) { for (auto& token: filter_value_tokens) { @@ -1431,8 +1312,8 @@ void filter_result_iterator_t::skip_to(uint32_t id, const bool& override_timeout } } -int filter_result_iterator_t::is_valid(uint32_t id) { - if (validity != valid) { +int filter_result_iterator_t::is_valid(uint32_t id, const bool& override_timeout) { + if (validity == invalid || (!override_timeout && timeout_info != nullptr && is_timed_out())) { return -1; } @@ -1442,10 +1323,6 @@ int filter_result_iterator_t::is_valid(uint32_t id) { return validity ? (seq_id == id ? 1 : 0) : -1; } - if (timeout_info != nullptr && is_timed_out()) { - return -1; - } - if (filter_node->isOperator) { // We only need to consider only valid/invalid state since child nodes can never time out. auto left_validity = left_it->is_valid(id), right_validity = right_it->is_valid(id); @@ -1458,21 +1335,19 @@ int filter_result_iterator_t::is_valid(uint32_t id) { return -1; } - // id did not match the filter but both of the sub-iterators are still valid. - // Updating seq_id to the next potential match. - if (left_validity == 0 && right_validity == 0) { - seq_id = std::max(left_it->seq_id, right_it->seq_id); - } else if (left_validity == 0) { - seq_id = left_it->seq_id; - } else { - seq_id = right_it->seq_id; - } - + seq_id = std::max(left_it->seq_id, right_it->seq_id); return 0; } seq_id = id; - and_filter_iterators(); + + reference.clear(); + for (const auto& item: left_it->reference) { + reference[item.first] = item.second; + } + for (const auto& item: right_it->reference) { + reference[item.first] = item.second; + } return 1; } else { validity = (left_it->validity == valid || right_it->validity == valid) ? valid : invalid; @@ -1480,28 +1355,63 @@ int filter_result_iterator_t::is_valid(uint32_t id) { if (left_validity < 1 && right_validity < 1) { if (left_validity == -1 && right_validity == -1) { return -1; - } - - // id did not match the filter; both of the sub-iterators or one of them might be valid. - // Updating seq_id to the next match. - if (left_validity == 0 && right_validity == 0) { - seq_id = std::min(left_it->seq_id, right_it->seq_id); - } else if (left_validity == 0) { - seq_id = left_it->seq_id; - } else { + } else if (left_validity == -1) { seq_id = right_it->seq_id; + return 0; + } else if (right_validity == -1) { + seq_id = left_it->seq_id; + return 0; } + seq_id = std::min(left_it->seq_id, right_it->seq_id); return 0; } seq_id = id; - or_filter_iterators(); + + reference.clear(); + if (left_validity == 1) { + for (const auto& item: left_it->reference) { + reference[item.first] = item.second; + } + } + if (right_validity == 1) { + for (const auto& item: right_it->reference) { + reference[item.first] = item.second; + } + } return 1; } } + if (is_not_equals_iterator) { + if (id > last_valid_id) { + validity = invalid; + return -1; + } + + validity = valid; + seq_id = id + 1; + + if (!is_equals_iterator_valid || id < equals_iterator_id) { + return 1; + } else if (id == equals_iterator_id) { + return 0; + } + } + skip_to(id); + + if (is_not_equals_iterator) { + validity = valid; + seq_id = id + 1; + + if (id == equals_iterator_id) { + return 0; + } + return 1; + } + return validity ? (seq_id == id ? 1 : 0) : -1; } @@ -1518,40 +1428,66 @@ Option filter_result_iterator_t::init_status() { } bool filter_result_iterator_t::contains_atleast_one(const void *obj) { + if (validity != valid) { + return false; + } + if(IS_COMPACT_POSTING(obj)) { compact_posting_list_t* list = COMPACT_POSTING_PTR(obj); + if (list->length == 0) { + return false; + } size_t i = 0; - while(i < list->length && validity == valid) { - size_t num_existing_offsets = list->id_offsets[i]; - size_t existing_id = list->id_offsets[i + num_existing_offsets + 1]; + size_t num_existing_offsets = list->id_offsets[i]; + size_t existing_id = list->id_offsets[i + num_existing_offsets + 1]; - if (existing_id == seq_id) { - return true; - } - - // advance smallest value + while (true) { if (existing_id < seq_id) { i += num_existing_offsets + 2; + + if (i >= list->length) { + return false; + } + + num_existing_offsets = list->id_offsets[i]; + existing_id = list->id_offsets[i + num_existing_offsets + 1]; + } else if (existing_id > seq_id) { + auto const& result = is_valid(existing_id); + + if (result == 1) { + return true; + } else if (result == -1) { + return false; + } } else { - skip_to(existing_id); + return true; } } } else { auto list = (posting_list_t*)(obj); posting_list_t::iterator_t it = list->new_iterator(); + if (!it.valid()) { + return false; + } - while(it.valid() && validity == valid) { - uint32_t id = it.id(); - - if(id == seq_id) { - return true; - } - - if(id < seq_id) { + while (true) { + if (it.id() < seq_id) { it.skip_to(seq_id); + + if (!it.valid()) { + return false; + } + } else if (it.id() > seq_id) { + auto const& result = is_valid(it.id()); + + if (result == 1) { + return true; + } else if (result == -1) { + return false; + } } else { - skip_to(id); + return true; } } } @@ -1632,37 +1568,23 @@ void filter_result_iterator_t::reset(const bool& override_timeout) { } } - if (a_filter.apply_not_equals && - (index->seq_ids->num_ids() - approx_filter_ids_length) >= string_filter_ids_threshold) { - all_seq_ids_iter = index->seq_ids->new_iterator(); + get_string_filter_next_match(f.is_array()); + if (is_not_equals_iterator) { + seq_id = 0; } - get_string_filter_first_match(f.is_array()); return; } } uint32_t filter_result_iterator_t::to_filter_id_array(uint32_t*& filter_array) { - if (validity != valid) { + if (!is_filter_result_initialized) { return 0; } - if (is_filter_result_initialized) { - filter_array = new uint32_t[filter_result.count]; - std::copy(filter_result.docs, filter_result.docs + filter_result.count, filter_array); - return filter_result.count; - } - - std::vector filter_ids; - do { - filter_ids.push_back(seq_id); - next(); - } while (validity == valid); - - filter_array = new uint32_t[filter_ids.size()]; - std::copy(filter_ids.begin(), filter_ids.end(), filter_array); - - return filter_ids.size(); + filter_array = new uint32_t[filter_result.count]; + std::copy(filter_result.docs, filter_result.docs + filter_result.count, filter_array); + return filter_result.count; } uint32_t filter_result_iterator_t::and_scalar(const uint32_t* A, const uint32_t& lenA, uint32_t*& results) { @@ -1676,14 +1598,13 @@ uint32_t filter_result_iterator_t::and_scalar(const uint32_t* A, const uint32_t& std::vector filter_ids; for (uint32_t i = 0; i < lenA; i++) { - auto result = is_valid(A[i]); - - if (result == -1) { - break; - } + auto const& id = A[i]; + auto const& result = is_valid(id); if (result == 1) { - filter_ids.push_back(A[i]); + filter_ids.push_back(id); + } else if (result == -1) { + break; } } @@ -1710,14 +1631,13 @@ void filter_result_iterator_t::and_scalar(const uint32_t* A, const uint32_t& len std::vector filter_ids; for (uint32_t i = 0; i < lenA; i++) { - auto _result = is_valid(A[i]); - - if (_result == -1) { - break; - } + auto const& id = A[i]; + auto const& _result = is_valid(id); if (_result == 1) { - filter_ids.push_back(A[i]); + filter_ids.push_back(id); + } else if (_result == -1) { + break; } } @@ -1739,12 +1659,10 @@ void filter_result_iterator_t::and_scalar(const uint32_t* A, const uint32_t& len for (uint32_t i = 0; i < lenA; i++) { auto _result = is_valid(A[i]); - if (_result == -1) { - break; - } - if (_result == 1) { match_indexes.push_back(result_index); + } else if (_result == -1) { + break; } } @@ -1987,25 +1905,15 @@ filter_result_iterator_t::filter_result_iterator_t(uint32_t* ids, const uint32_t void filter_result_iterator_t::add_phrase_ids(filter_result_iterator_t*& fit, uint32_t* phrase_result_ids, const uint32_t& phrase_result_count) { + fit->reset(); + auto root_iterator = new filter_result_iterator_t(std::min(phrase_result_count, fit->approx_filter_ids_length)); root_iterator->left_it = new filter_result_iterator_t(phrase_result_ids, phrase_result_count); root_iterator->right_it = fit; - - auto& left_it = root_iterator->left_it; - auto& right_it = root_iterator->right_it; - - while (left_it->validity && right_it->validity && left_it->seq_id != right_it->seq_id) { - if (left_it->seq_id < right_it->seq_id) { - left_it->skip_to(right_it->seq_id); - } else { - right_it->skip_to(left_it->seq_id); - } - } - root_iterator->timeout_info = std::move(fit->timeout_info); - root_iterator->validity = (left_it->validity == timed_out || right_it->validity == timed_out) ? timed_out : - (left_it->validity == invalid || right_it->validity == invalid) ? invalid : valid; - root_iterator->seq_id = left_it->seq_id; + + root_iterator->and_filter_iterators(); + fit = root_iterator; } diff --git a/src/index.cpp b/src/index.cpp index 758770e5..66873557 100644 --- a/src/index.cpp +++ b/src/index.cpp @@ -2927,34 +2927,36 @@ Option Index::search(std::vector& field_query_tokens, cons std::vector> dist_results; - uint32_t filter_id_count = 0; - while (!no_filters_provided && - filter_id_count < vector_query.flat_search_cutoff && filter_result_iterator->validity == filter_result_iterator_t::valid) { - auto& seq_id = filter_result_iterator->seq_id; - auto filter_result = single_filter_result_t(seq_id, std::move(filter_result_iterator->reference)); - filter_result_iterator->next(); - std::vector values; + filter_result_iterator->compute_iterators(); - try { - values = field_vector_index->vecdex->getDataByLabel(seq_id); - } catch(...) { - // likely not found - continue; + uint32_t filter_id_count = filter_result_iterator->approx_filter_ids_length; + if (!no_filters_provided && filter_id_count < vector_query.flat_search_cutoff) { + while (filter_result_iterator->validity == filter_result_iterator_t::valid) { + auto &seq_id = filter_result_iterator->seq_id; + auto filter_result = single_filter_result_t(seq_id, std::move(filter_result_iterator->reference)); + filter_result_iterator->next(); + std::vector values; + + try { + values = field_vector_index->vecdex->getDataByLabel(seq_id); + } catch (...) { + // likely not found + continue; + } + + float dist; + if (field_vector_index->distance_type == cosine) { + std::vector normalized_q(vector_query.values.size()); + hnsw_index_t::normalize_vector(vector_query.values, normalized_q); + dist = field_vector_index->space->get_dist_func()(normalized_q.data(), values.data(), + &field_vector_index->num_dim); + } else { + dist = field_vector_index->space->get_dist_func()(vector_query.values.data(), values.data(), + &field_vector_index->num_dim); + } + + dist_results.emplace_back(dist, filter_result); } - - float dist; - if(field_vector_index->distance_type == cosine) { - std::vector normalized_q(vector_query.values.size()); - hnsw_index_t::normalize_vector(vector_query.values, normalized_q); - dist = field_vector_index->space->get_dist_func()(normalized_q.data(), values.data(), - &field_vector_index->num_dim); - } else { - dist = field_vector_index->space->get_dist_func()(vector_query.values.data(), values.data(), - &field_vector_index->num_dim); - } - - dist_results.emplace_back(dist, filter_result); - filter_id_count++; } filter_result_iterator->reset(); search_cutoff = search_cutoff || filter_result_iterator->validity == filter_result_iterator_t::timed_out; @@ -2989,9 +2991,12 @@ Option Index::search(std::vector& field_query_tokens, cons search_cutoff = true; } - // The doc_id must be valid otherwise it would've been filtered out upstream. - filter_result_iterator->skip_to(pair.second, search_cutoff); - auto filter_result = single_filter_result_t(pair.second, + auto const& seq_id = pair.second; + if (filter_result_iterator->is_valid(seq_id, search_cutoff) != 1) { + continue; + } + // The seq_id must be valid otherwise it would've been filtered out upstream. + auto filter_result = single_filter_result_t(seq_id, std::move(filter_result_iterator->reference)); dist_results.emplace_back(pair.first, filter_result); } @@ -3447,7 +3452,9 @@ Option Index::search(std::vector& field_query_tokens, cons auto& vec_result = vec_results[res_index]; auto seq_id = vec_result.first; - filter_result_iterator->skip_to(seq_id); + if (!no_filters_provided && filter_result_iterator->is_valid(seq_id) != 1) { + continue; + } auto references = std::move(filter_result_iterator->reference); filter_result_iterator->reset(); @@ -5404,6 +5411,7 @@ Option Index::do_phrase_search(const size_t num_search_fields, const std:: return Option(true); } + filter_result_iterator->compute_iterators(); all_result_ids_len = filter_result_iterator->to_filter_id_array(all_result_ids); filter_result_iterator->reset(); @@ -6147,6 +6155,8 @@ void Index::populate_sort_mapping(int* sort_order, std::vector& geopoint if (!filter_init_op.ok()) { return; } + + filter_result_iterator.compute_iterators(); uint32_t* eval_ids = nullptr; auto eval_ids_count = filter_result_iterator.to_filter_id_array(eval_ids); diff --git a/src/or_iterator.cpp b/src/or_iterator.cpp index b209e33f..51b63704 100644 --- a/src/or_iterator.cpp +++ b/src/or_iterator.cpp @@ -209,12 +209,7 @@ bool or_iterator_t::take_id(result_iter_state_t& istate, uint32_t id, bool& is_e } if (istate.fit != nullptr && istate.fit->approx_filter_ids_length > 0) { - if (istate.fit->is_valid(id) == 1) { - istate.fit->next(); - return true; - } - - return false; + return istate.fit->is_valid(id) == 1; } return true; @@ -264,6 +259,7 @@ bool or_iterator_t::take_id(result_iter_state_t& istate, uint32_t id, bool& is_e if (istate.fit->is_valid(id) == 1) { filter_result.seq_id = id; filter_result.reference_filter_results = std::move(istate.fit->reference); + istate.fit->next(); return true; } diff --git a/test/filter_test.cpp b/test/filter_test.cpp index f65a6c3b..e5479101 100644 --- a/test/filter_test.cpp +++ b/test/filter_test.cpp @@ -181,51 +181,6 @@ TEST_F(FilterTest, FilterTreeIterator) { ASSERT_EQ(filter_result_iterator_t::invalid, iter_not_equals_test.validity); - delete filter_tree_root; - filter_tree_root = nullptr; - filter_op = filter::parse_filter_query("tags: gold", coll->get_schema(), store, doc_id_prefix, - filter_tree_root); - ASSERT_TRUE(filter_op.ok()); - - auto iter_skip_test1 = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root); - ASSERT_TRUE(iter_skip_test1.init_status().ok()); - - ASSERT_EQ(filter_result_iterator_t::valid, iter_skip_test1.validity); - iter_skip_test1.skip_to(3); - ASSERT_EQ(filter_result_iterator_t::valid, iter_skip_test1.validity); - ASSERT_EQ(4, iter_skip_test1.seq_id); - iter_skip_test1.next(); - - ASSERT_EQ(filter_result_iterator_t::invalid, iter_skip_test1.validity); - - delete filter_tree_root; - filter_tree_root = nullptr; - filter_op = filter::parse_filter_query("tags: != silver", coll->get_schema(), store, doc_id_prefix, - filter_tree_root); - ASSERT_TRUE(filter_op.ok()); - - auto iter_skip_test2 = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root); - ASSERT_TRUE(iter_skip_test2.init_status().ok()); - - ASSERT_EQ(filter_result_iterator_t::valid, iter_skip_test2.validity); - iter_skip_test2.skip_to(3); - ASSERT_EQ(filter_result_iterator_t::invalid, iter_skip_test2.validity); - - delete filter_tree_root; - filter_tree_root = nullptr; - filter_op = filter::parse_filter_query("name: jeremy && tags: fine platinum", coll->get_schema(), store, doc_id_prefix, - filter_tree_root); - ASSERT_TRUE(filter_op.ok()); - - auto iter_and_test = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root); - ASSERT_TRUE(iter_and_test.init_status().ok()); - - ASSERT_EQ(filter_result_iterator_t::valid, iter_and_test.validity); - ASSERT_EQ(1, iter_and_test.seq_id); - iter_and_test.next(); - - ASSERT_EQ(filter_result_iterator_t::invalid, iter_and_test.validity); - delete filter_tree_root; filter_tree_root = nullptr; filter_op = filter::parse_filter_query("name: James || tags: bronze", coll->get_schema(), store, doc_id_prefix, @@ -261,20 +216,21 @@ TEST_F(FilterTest, FilterTreeIterator) { filter_tree_root); ASSERT_TRUE(filter_op.ok()); - auto iter_skip_complex_filter_test = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root); - ASSERT_TRUE(iter_skip_complex_filter_test.init_status().ok()); + auto iter_complex_filter_test = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root); + ASSERT_TRUE(iter_complex_filter_test.init_status().ok()); - ASSERT_EQ(filter_result_iterator_t::valid, iter_skip_complex_filter_test.validity); - iter_skip_complex_filter_test.skip_to(4); + ASSERT_EQ(filter_result_iterator_t::valid, iter_complex_filter_test.validity); + ASSERT_EQ(0, iter_complex_filter_test.is_valid(3)); + ASSERT_EQ(4, iter_complex_filter_test.seq_id); expected = {4, 5}; for (auto const& i : expected) { - ASSERT_EQ(filter_result_iterator_t::valid, iter_skip_complex_filter_test.validity); - ASSERT_EQ(i, iter_skip_complex_filter_test.seq_id); - iter_skip_complex_filter_test.next(); + ASSERT_EQ(filter_result_iterator_t::valid, iter_complex_filter_test.validity); + ASSERT_EQ(i, iter_complex_filter_test.seq_id); + iter_complex_filter_test.next(); } - ASSERT_EQ(filter_result_iterator_t::invalid, iter_skip_complex_filter_test.validity); + ASSERT_EQ(filter_result_iterator_t::invalid, iter_complex_filter_test.validity); delete filter_tree_root; filter_tree_root = nullptr; @@ -285,7 +241,8 @@ TEST_F(FilterTest, FilterTreeIterator) { auto iter_validate_ids_test1 = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root); ASSERT_TRUE(iter_validate_ids_test1.init_status().ok()); - std::vector validate_ids = {0, 1, 2, 3, 4, 5, 6}, seq_ids = {0, 2, 2, 4, 4, 5, 5}; + std::vector validate_ids = {0, 1, 2, 3, 4, 5, 6}; + std::vector seq_ids = {0, 2, 2, 4, 4, 5, 5}; expected = {1, 0, 1, 0, 1, 1, -1}; for (uint32_t i = 0; i < validate_ids.size(); i++) { ASSERT_EQ(expected[i], iter_validate_ids_test1.is_valid(validate_ids[i])); @@ -324,24 +281,6 @@ TEST_F(FilterTest, FilterTreeIterator) { ASSERT_EQ(seq_ids[i], iter_validate_ids_test3.seq_id); } - delete filter_tree_root; - filter_tree_root = nullptr; - filter_op = filter::parse_filter_query("name: James || tags: != gold", coll->get_schema(), store, doc_id_prefix, - filter_tree_root); - ASSERT_TRUE(filter_op.ok()); - - auto iter_validate_ids_not_equals_filter_test = filter_result_iterator_t(coll->get_name(), coll->_get_index(), - filter_tree_root); - ASSERT_TRUE(iter_validate_ids_not_equals_filter_test.init_status().ok()); - - validate_ids = {0, 1, 2, 3, 4, 5, 6}; - seq_ids = {1, 1, 3, 3, 5, 5, 5}; - expected = {0, 1, 0, 1, 0, 1, -1}; - for (uint32_t i = 0; i < validate_ids.size(); i++) { - ASSERT_EQ(expected[i], iter_validate_ids_not_equals_filter_test.is_valid(validate_ids[i])); - ASSERT_EQ(seq_ids[i], iter_validate_ids_not_equals_filter_test.seq_id); - } - delete filter_tree_root; filter_tree_root = nullptr; filter_op = filter::parse_filter_query("tags: gold", coll->get_schema(), store, doc_id_prefix, @@ -377,7 +316,7 @@ TEST_F(FilterTest, FilterTreeIterator) { ASSERT_TRUE(iter_plist_contains_atleast_one_test1.init_status().ok()); posting_list_t p_list1(2); - ids = {1, 3, 5}; + ids = {1, 3}; for (const auto &i: ids) { p_list1.upsert(i, {1, 2, 3}); } @@ -447,6 +386,7 @@ TEST_F(FilterTest, FilterTreeIterator) { uint32_t* filter_ids = nullptr; uint32_t filter_ids_length; + iter_to_array_test.compute_iterators(); filter_ids_length = iter_to_array_test.to_filter_id_array(filter_ids); ASSERT_EQ(3, filter_ids_length); @@ -454,7 +394,6 @@ TEST_F(FilterTest, FilterTreeIterator) { for (uint32_t i = 0; i < filter_ids_length; i++) { ASSERT_EQ(expected[i], filter_ids[i]); } - ASSERT_EQ(filter_result_iterator_t::invalid, iter_to_array_test.validity); delete[] filter_ids; @@ -487,33 +426,8 @@ TEST_F(FilterTest, FilterTreeIterator) { ASSERT_TRUE(add_op.ok()); filter_tree_root = nullptr; - filter_op = filter::parse_filter_query("tags: != FINE PLATINUM", coll->get_schema(), store, doc_id_prefix, + filter_op = filter::parse_filter_query("tags: bronze", coll->get_schema(), store, doc_id_prefix, filter_tree_root); - ASSERT_TRUE(filter_op.ok()); - - auto iter_skip_test3 = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root); - ASSERT_TRUE(iter_skip_test3.init_status().ok()); - - ASSERT_EQ(filter_result_iterator_t::valid, iter_skip_test3.validity); - iter_skip_test3.skip_to(4); - ASSERT_EQ(4, iter_skip_test3.seq_id); - - ASSERT_EQ(filter_result_iterator_t::valid, iter_skip_test3.validity); - - delete filter_tree_root; - - filter_tree_root = nullptr; - filter_op = filter::parse_filter_query("tags: != gold", coll->get_schema(), store, doc_id_prefix, - filter_tree_root); - ASSERT_TRUE(filter_op.ok()); - - auto iter_skip_test4 = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root); - ASSERT_TRUE(iter_skip_test4.init_status().ok()); - - ASSERT_EQ(filter_result_iterator_t::valid, iter_skip_test4.validity); - iter_skip_test4.skip_to(6); - ASSERT_EQ(6, iter_skip_test4.seq_id); - ASSERT_EQ(filter_result_iterator_t::valid, iter_skip_test4.validity); auto iter_add_phrase_ids_test = new filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root); std::unique_ptr filter_iter_guard(iter_add_phrase_ids_test); @@ -528,7 +442,7 @@ TEST_F(FilterTest, FilterTreeIterator) { filter_iter_guard.reset(iter_add_phrase_ids_test); ASSERT_EQ(filter_result_iterator_t::valid, iter_add_phrase_ids_test->validity); - ASSERT_EQ(6, iter_add_phrase_ids_test->seq_id); + ASSERT_EQ(2, iter_add_phrase_ids_test->seq_id); delete filter_tree_root; filter_tree_root = nullptr; @@ -605,26 +519,6 @@ TEST_F(FilterTest, FilterTreeIterator) { ASSERT_EQ(filter_result_iterator_t::invalid, iter_string_equals_test_2.validity); delete filter_tree_root; - - filter_tree_root = nullptr; - filter_op = filter::parse_filter_query("tags: != gold", coll->get_schema(), store, doc_id_prefix, - filter_tree_root); - ASSERT_TRUE(filter_op.ok()); - - auto iter_string_not_equals_test = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root); - ASSERT_TRUE(iter_string_not_equals_test.init_status().ok()); - ASSERT_FALSE(iter_string_not_equals_test._get_is_filter_result_initialized()); - - expected = {1, 3, 5, 6}; - for (auto const& i : expected) { - ASSERT_EQ(filter_result_iterator_t::valid, iter_string_not_equals_test.validity); - ASSERT_EQ(i, iter_string_not_equals_test.seq_id); - iter_string_not_equals_test.next(); - } - ASSERT_EQ(filter_result_iterator_t::invalid, iter_string_not_equals_test.validity); - - delete filter_tree_root; - filter_tree_root = nullptr; filter_op = filter::parse_filter_query("tags: != [gold, silver]", coll->get_schema(), store, doc_id_prefix, filter_tree_root); @@ -656,34 +550,6 @@ TEST_F(FilterTest, FilterTreeIterator) { delete filter_tree_root; - filter_tree_root = nullptr; - filter_op = filter::parse_filter_query("name: != James Rowdy", coll->get_schema(), store, doc_id_prefix, - filter_tree_root); - ASSERT_TRUE(filter_op.ok()); - - auto iter_string_not_equals_test_3 = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root); - ASSERT_TRUE(iter_string_not_equals_test_3.init_status().ok()); - ASSERT_FALSE(iter_string_not_equals_test_3._get_is_filter_result_initialized()); - - expected = {1, 3, 4}; - for (auto const& i : expected) { - ASSERT_EQ(filter_result_iterator_t::valid, iter_string_not_equals_test_3.validity); - ASSERT_EQ(i, iter_string_not_equals_test_3.seq_id); - iter_string_not_equals_test_3.next(); - } - ASSERT_EQ(filter_result_iterator_t::invalid, iter_string_not_equals_test_3.validity); - - iter_string_not_equals_test_3.reset(); - - expected = {1, 3, 4}; - for (auto const& i : expected) { - ASSERT_EQ(filter_result_iterator_t::valid, iter_string_not_equals_test_3.validity); - ASSERT_EQ(i, iter_string_not_equals_test_3.seq_id); - iter_string_not_equals_test_3.next(); - } - ASSERT_EQ(filter_result_iterator_t::invalid, iter_string_not_equals_test_3.validity); - delete filter_tree_root; - Collection *bool_coll; std::vector fields = {field("title", field_types::STRING, false), @@ -753,7 +619,7 @@ TEST_F(FilterTest, FilterTreeIterator) { ASSERT_EQ(filter_result_iterator_t::invalid, iter_boolean_test_2.validity); iter_boolean_test_2.reset(); - iter_boolean_test_2.skip_to(6); + ASSERT_EQ(0, iter_boolean_test_2.is_valid(6)); ASSERT_EQ(filter_result_iterator_t::valid, iter_boolean_test_2.validity); ASSERT_EQ(7, iter_boolean_test_2.seq_id); @@ -803,13 +669,23 @@ TEST_F(FilterTest, FilterTreeIterator) { auto iter_string_prefix_value_test_2 = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root); ASSERT_TRUE(iter_string_prefix_value_test_2.init_status().ok()); ASSERT_FALSE(iter_string_prefix_value_test_2._get_is_filter_result_initialized()); - ASSERT_EQ(3, iter_string_prefix_value_test_2.approx_filter_ids_length); // document 0 and 2 have been deleted. + ASSERT_EQ(4, iter_string_prefix_value_test_2.approx_filter_ids_length); // 7 total docs, 3 approx count for equals. - expected = {1, 3, 5, 6, 7}; - for (auto const& i : expected) { + validate_ids = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + seq_ids = {1, 2, 3, 4, 5, 6, 7, 8, 9, 9}; + expected = {1, 1, 1, 1, 0, 1, 1, 1, 0, -1}; + std::vector equals_match_seq_ids = {4, 4, 4, 4, 4, 8, 8, 8, 8, 8}; + std::vector equals_iterator_valid = {true, true, true, true, true, true, true, true, true, true}; + for (uint32_t i = 0; i < validate_ids.size(); i++) { ASSERT_EQ(filter_result_iterator_t::valid, iter_string_prefix_value_test_2.validity); - ASSERT_EQ(i, iter_string_prefix_value_test_2.seq_id); - iter_string_prefix_value_test_2.next(); + ASSERT_EQ(expected[i], iter_string_prefix_value_test_2.is_valid(validate_ids[i])); + ASSERT_EQ(equals_match_seq_ids[i], iter_string_prefix_value_test_2._get_equals_iterator_id()); + ASSERT_EQ(equals_iterator_valid[i], iter_string_prefix_value_test_2._get_is_equals_iterator_valid()); + + if (expected[i] == 1) { + iter_string_prefix_value_test_2.next(); + } + ASSERT_EQ(seq_ids[i], iter_string_prefix_value_test_2.seq_id); } ASSERT_EQ(filter_result_iterator_t::invalid, iter_string_prefix_value_test_2.validity); @@ -938,3 +814,263 @@ TEST_F(FilterTest, FilterTreeInitialization) { delete filter_tree_root; filter_tree_root = nullptr; } + +TEST_F(FilterTest, NotEqualsStringFilter) { + nlohmann::json schema = + R"({ + "name": "Collection", + "fields": [ + {"name": "name", "type": "string"}, + {"name": "tags", "type": "string[]"} + ] + })"_json; + + Collection* coll = collectionManager.create_collection(schema).get(); + + std::ifstream infile(std::string(ROOT_DIR)+"test/numeric_array_documents.jsonl"); + std::string json_line; + while (std::getline(infile, json_line)) { + auto add_op = coll->add(json_line); + ASSERT_TRUE(add_op.ok()); + } + infile.close(); + + const std::string doc_id_prefix = std::to_string(coll->get_collection_id()) + "_" + Collection::DOC_ID_PREFIX + "_"; + filter_node_t* filter_tree_root = nullptr; + + Option filter_op = filter::parse_filter_query("tags:!= gold", coll->get_schema(), store, doc_id_prefix, + filter_tree_root); + ASSERT_TRUE(filter_op.ok()); + + auto computed_not_equals_test = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root); + ASSERT_TRUE(computed_not_equals_test.init_status().ok()); + ASSERT_TRUE(computed_not_equals_test._get_is_filter_result_initialized()); + + std::vector expected = {1, 3}; + for (auto const& i : expected) { + ASSERT_EQ(filter_result_iterator_t::valid, computed_not_equals_test.validity); + ASSERT_EQ(i, computed_not_equals_test.seq_id); + computed_not_equals_test.next(); + } + ASSERT_EQ(filter_result_iterator_t::invalid, computed_not_equals_test.validity); + + delete filter_tree_root; + filter_tree_root = nullptr; + filter_op = filter::parse_filter_query("tags: != fine platinum", coll->get_schema(), store, doc_id_prefix, + filter_tree_root); + ASSERT_TRUE(filter_op.ok()); + + auto iter_string_not_equals_test = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root); + ASSERT_TRUE(iter_string_not_equals_test.init_status().ok()); + ASSERT_FALSE(iter_string_not_equals_test._get_is_filter_result_initialized()); + + std::vector validate_ids = {0, 1, 2, 3, 4, 5}; + std::vector seq_ids = {1, 2, 3, 4, 5, 5}; + std::vector equals_match_seq_ids = {1, 1, 1, 1, 1, 1}; + std::vector equals_iterator_valid = {true, true, false, false, false, false}; + expected = {1, 0, 1, 1, 1, -1}; + for (uint32_t i = 0; i < validate_ids.size(); i++) { + ASSERT_EQ(filter_result_iterator_t::valid, iter_string_not_equals_test.validity); + ASSERT_EQ(expected[i], iter_string_not_equals_test.is_valid(validate_ids[i])); + ASSERT_EQ(equals_match_seq_ids[i], iter_string_not_equals_test._get_equals_iterator_id()); + ASSERT_EQ(equals_iterator_valid[i], iter_string_not_equals_test._get_is_equals_iterator_valid()); + + if (expected[i] == 1) { + iter_string_not_equals_test.next(); + } + ASSERT_EQ(seq_ids[i], iter_string_not_equals_test.seq_id); + } + ASSERT_EQ(filter_result_iterator_t::invalid, iter_string_not_equals_test.validity); + + delete filter_tree_root; + filter_tree_root = nullptr; + filter_op = filter::parse_filter_query("tags: != [gold, silver]", coll->get_schema(), store, doc_id_prefix, + filter_tree_root); + ASSERT_TRUE(filter_op.ok()); + auto iter_string_array_not_equals_test = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root); + ASSERT_TRUE(iter_string_array_not_equals_test.init_status().ok()); + ASSERT_FALSE(iter_string_array_not_equals_test._get_is_filter_result_initialized()); + ASSERT_EQ(5, iter_string_array_not_equals_test.approx_filter_ids_length); + + validate_ids = {0, 1, 2, 3, 4, 5}; + seq_ids = {1, 2, 3, 4, 5, 5}; + expected = {0, 1, 0, 0, 0, -1}; + for (uint32_t i = 0; i < validate_ids.size(); i++) { + ASSERT_EQ(filter_result_iterator_t::valid, iter_string_array_not_equals_test.validity); + ASSERT_EQ(expected[i], iter_string_array_not_equals_test.is_valid(validate_ids[i])); + + if (expected[i] == 1) { + iter_string_array_not_equals_test.next(); + } + ASSERT_EQ(seq_ids[i], iter_string_array_not_equals_test.seq_id); + } + ASSERT_EQ(filter_result_iterator_t::invalid, iter_string_array_not_equals_test.validity); + + delete filter_tree_root; + filter_tree_root = nullptr; + + auto docs = { + R"({ + "name": "James Rowdy", + "tags": ["copper"] + })"_json, + R"({ + "name": "James Rowdy", + "tags": ["copper"] + })"_json, + R"({ + "name": "James Rowdy", + "tags": ["gold"] + })"_json + }; + + for (auto const& doc: docs) { + auto add_op = coll->add(doc.dump()); + ASSERT_TRUE(add_op.ok()); + } + + filter_op = filter::parse_filter_query("tags: != gold", coll->get_schema(), store, doc_id_prefix, + filter_tree_root); + ASSERT_TRUE(filter_op.ok()); + + auto iter_string_not_equals_test_2 = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root); + ASSERT_TRUE(iter_string_not_equals_test_2.init_status().ok()); + ASSERT_FALSE(iter_string_not_equals_test_2._get_is_filter_result_initialized()); + + validate_ids = {1, 2, 3, 4, 5, 6, 7, 8}; + seq_ids = {2, 3, 4, 5, 6, 7, 8, 8}; + expected = {1, 0, 1, 0, 1, 1, 0, -1}; + equals_match_seq_ids = {2, 2, 4, 4, 7, 7, 7, 7}; + equals_iterator_valid = {true, true, true, true, true, true, true, true}; + for (uint32_t i = 0; i < validate_ids.size(); i++) { + ASSERT_EQ(filter_result_iterator_t::valid, iter_string_not_equals_test_2.validity); + ASSERT_EQ(expected[i], iter_string_not_equals_test_2.is_valid(validate_ids[i])); + ASSERT_EQ(equals_match_seq_ids[i], iter_string_not_equals_test_2._get_equals_iterator_id()); + ASSERT_EQ(equals_iterator_valid[i], iter_string_not_equals_test_2._get_is_equals_iterator_valid()); + + if (expected[i] == 1) { + iter_string_not_equals_test_2.next(); + } + ASSERT_EQ(seq_ids[i], iter_string_not_equals_test_2.seq_id); + } + ASSERT_EQ(filter_result_iterator_t::invalid, iter_string_not_equals_test_2.validity); + + iter_string_not_equals_test_2.reset(); + validate_ids = {2, 5, 7, 8}; + seq_ids = {3, 6, 8, 8}; + expected = {0, 1, 0, -1}; + equals_match_seq_ids = {2, 7, 7, 7}; + equals_iterator_valid = {true, true, true, true}; + for (uint32_t i = 0; i < validate_ids.size(); i++) { + ASSERT_EQ(filter_result_iterator_t::valid, iter_string_not_equals_test_2.validity); + ASSERT_EQ(expected[i], iter_string_not_equals_test_2.is_valid(validate_ids[i])); + ASSERT_EQ(equals_match_seq_ids[i], iter_string_not_equals_test_2._get_equals_iterator_id()); + ASSERT_EQ(equals_iterator_valid[i], iter_string_not_equals_test_2._get_is_equals_iterator_valid()); + + if (expected[i] == 1) { + iter_string_not_equals_test_2.next(); + } + ASSERT_EQ(seq_ids[i], iter_string_not_equals_test_2.seq_id); + } + ASSERT_EQ(filter_result_iterator_t::invalid, iter_string_not_equals_test_2.validity); + + delete filter_tree_root; + filter_tree_root = nullptr; + + filter_op = filter::parse_filter_query("name: James || tags: != bronze", coll->get_schema(), store, doc_id_prefix, + filter_tree_root); + ASSERT_TRUE(filter_op.ok()); + + auto iter_not_equals_or_test = filter_result_iterator_t(coll->get_name(), coll->_get_index(), + filter_tree_root); + ASSERT_TRUE(iter_not_equals_or_test.init_status().ok()); + ASSERT_FALSE(iter_not_equals_or_test._get_is_filter_result_initialized()); + + validate_ids = {0, 1, 2, 3, 4, 5, 6, 7, 8}; + seq_ids = {1, 2, 3, 4, 5, 6, 7, 8, 8}; + expected = {1, 1, 0, 1, 0, 1, 1, 1, -1}; + for (uint32_t i = 0; i < validate_ids.size(); i++) { + ASSERT_EQ(filter_result_iterator_t::valid, iter_not_equals_or_test.validity); + ASSERT_EQ(expected[i], iter_not_equals_or_test.is_valid(validate_ids[i])); + + if (expected[i] == 1) { + iter_not_equals_or_test.next(); + } + ASSERT_EQ(seq_ids[i], iter_not_equals_or_test.seq_id); + } + ASSERT_EQ(filter_result_iterator_t::invalid, iter_not_equals_or_test.validity); + + delete filter_tree_root; + filter_tree_root = nullptr; + filter_op = filter::parse_filter_query("tags: != silver || tags: != gold", coll->get_schema(), store, doc_id_prefix, + filter_tree_root); + ASSERT_TRUE(filter_op.ok()); + + auto iter_not_equals_or_test_2 = filter_result_iterator_t(coll->get_name(), coll->_get_index(), + filter_tree_root); + ASSERT_TRUE(iter_not_equals_or_test_2.init_status().ok()); + + validate_ids = {0, 1, 2, 3, 4, 5, 6, 7, 8}; + seq_ids = {1, 2, 3, 4, 5, 6, 7, 8, 8}; + expected = {0, 1, 1, 1, 0, 1, 1, 1, -1}; + for (uint32_t i = 0; i < validate_ids.size(); i++) { + ASSERT_EQ(filter_result_iterator_t::valid, iter_not_equals_or_test_2.validity); + ASSERT_EQ(expected[i], iter_not_equals_or_test_2.is_valid(validate_ids[i])); + + if (expected[i] == 1) { + iter_not_equals_or_test_2.next(); + } + ASSERT_EQ(seq_ids[i], iter_not_equals_or_test_2.seq_id); + } + ASSERT_EQ(filter_result_iterator_t::invalid, iter_not_equals_or_test_2.validity); + + delete filter_tree_root; + filter_tree_root = nullptr; + filter_op = filter::parse_filter_query("name: James && tags: != gold", coll->get_schema(), store, doc_id_prefix, + filter_tree_root); + ASSERT_TRUE(filter_op.ok()); + + auto iter_not_equals_and_test = filter_result_iterator_t(coll->get_name(), coll->_get_index(), + filter_tree_root); + ASSERT_TRUE(iter_not_equals_and_test.init_status().ok()); + + validate_ids = {5, 6, 7, 8}; + seq_ids = {6, 7, 8, 8}; + expected = {1, 1, 0, -1}; + for (uint32_t i = 0; i < validate_ids.size(); i++) { + ASSERT_EQ(filter_result_iterator_t::valid, iter_not_equals_and_test.validity); + ASSERT_EQ(expected[i], iter_not_equals_and_test.is_valid(validate_ids[i])); + + if (expected[i] == 1) { + iter_not_equals_and_test.next(); + } + ASSERT_EQ(seq_ids[i], iter_not_equals_and_test.seq_id); + } + ASSERT_EQ(filter_result_iterator_t::invalid, iter_not_equals_and_test.validity); + + delete filter_tree_root; + filter_tree_root = nullptr; + filter_op = filter::parse_filter_query("tags: != silver && tags: != gold", coll->get_schema(), store, doc_id_prefix, + filter_tree_root); + ASSERT_TRUE(filter_op.ok()); + + auto iter_not_equals_and_test_2 = filter_result_iterator_t(coll->get_name(), coll->_get_index(), + filter_tree_root); + ASSERT_TRUE(iter_not_equals_and_test_2.init_status().ok()); + + validate_ids = {0, 1, 2, 3, 4, 5, 6, 7, 8}; + seq_ids = {1, 2, 3, 4, 5, 6, 7, 8, 8}; + expected = {0, 1, 0, 0, 0, 1, 1, 0, -1}; + for (uint32_t i = 0; i < validate_ids.size(); i++) { + ASSERT_EQ(filter_result_iterator_t::valid, iter_not_equals_and_test_2.validity); + ASSERT_EQ(expected[i], iter_not_equals_and_test_2.is_valid(validate_ids[i])); + + if (expected[i] == 1) { + iter_not_equals_and_test_2.next(); + } + ASSERT_EQ(seq_ids[i], iter_not_equals_and_test_2.seq_id); + } + ASSERT_EQ(filter_result_iterator_t::invalid, iter_not_equals_and_test_2.validity); + + delete filter_tree_root; +}