From 98965418745547b6f390f62dac475bd3f426d359 Mon Sep 17 00:00:00 2001 From: Harpreet Sangar Date: Thu, 20 Apr 2023 13:19:42 +0530 Subject: [PATCH] Add `filter_result_iterator_t::get_n_ids`. Use `is_valid` instead of `valid()`. Handle special `_all_` field name in filtering logic. --- include/filter_result_iterator.h | 28 ++-- include/index.h | 1 + src/art.cpp | 3 +- src/filter.cpp | 3 + src/filter_result_iterator.cpp | 256 +++++++++++++++++++------------ src/index.cpp | 47 +++--- test/filter_test.cpp | 64 ++++---- 7 files changed, 227 insertions(+), 175 deletions(-) diff --git a/include/filter_result_iterator.h b/include/filter_result_iterator.h index bd9e66f0..1184b74a 100644 --- a/include/filter_result_iterator.h +++ b/include/filter_result_iterator.h @@ -99,6 +99,7 @@ private: /// Stores the result of the filters that cannot be iterated. filter_result_t filter_result; + bool is_filter_result_initialized = false; /// Initialized in case of filter on string field. /// Sample filter values: ["foo bar", "baz"]. Each filter value is split into tokens. We get posting list iterator @@ -108,9 +109,6 @@ private: std::vector> posting_list_iterators; std::vector expanded_plists; - /// Set to false when this iterator or it's subtree becomes invalid. - bool is_valid = true; - /// Initializes the state of iterator node after it's creation. void init(); @@ -126,18 +124,18 @@ private: /// Finds the next match for a filter on string field. void doc_matching_string_filter(bool field_is_array); + /// Returns true when doc and reference hold valid values. Used in conjunction with next() and skip_to(id). + [[nodiscard]] bool valid(); + public: - uint32_t* get_ids() { - return filter_result.docs; - } - - uint32_t get_length() { - return filter_result.count; - } - uint32_t seq_id = 0; /// Collection name -> references std::map reference; + + /// Set to false when this iterator or it's subtree becomes invalid. + bool is_valid = true; + + /// Initialization status of the iterator. Option status = Option(true); /// Holds the upper-bound of the number of seq ids this iterator would match. @@ -156,9 +154,6 @@ public: /// Returns the status of the initialization of iterator tree. Option init_status(); - /// Returns true when doc and reference hold valid values. Used in conjunction with next() and skip_to(id). - [[nodiscard]] bool valid(); - /// Returns a tri-state: /// 0: id is not valid /// 1: id is valid @@ -171,6 +166,9 @@ public: /// operation. void next(); + /// Collects n doc ids while advancing the iterator. The iterator may become invalid during this operation. + void get_n_ids(const uint32_t& n, std::vector& results); + /// Advances the iterator until the doc value reaches or just overshoots id. The iterator may become invalid during /// this operation. void skip_to(uint32_t id); @@ -188,6 +186,4 @@ public: /// Performs AND with the contents of A and allocates a new array of results. /// \return size of the results array uint32_t and_scalar(const uint32_t* A, const uint32_t& lenA, uint32_t*& results); - - bool can_get_ids(); }; diff --git a/include/index.h b/include/index.h index 56afb348..d2073474 100644 --- a/include/index.h +++ b/include/index.h @@ -563,6 +563,7 @@ public: static const int DROP_TOKENS_THRESHOLD = 1; // "_all_" is a special field that maps to all the ids in the index. + static constexpr const char* SEQ_IDS_FIELD = "_all_"; static constexpr const char* SEQ_IDS_FILTER = "_all_: 1"; Index() = delete; diff --git a/src/art.cpp b/src/art.cpp index d189d7eb..7a7d5d5b 100644 --- a/src/art.cpp +++ b/src/art.cpp @@ -991,7 +991,7 @@ const uint32_t* get_allowed_doc_ids(art_tree *t, const std::string& prev_token, std::vector prev_leaf_ids; posting_t::merge({prev_leaf->values}, prev_leaf_ids); - if(filter_result_iterator.valid()) { + if(filter_result_iterator.is_valid) { prev_token_doc_ids_len = filter_result_iterator.and_scalar(prev_leaf_ids.data(), prev_leaf_ids.size(), prev_token_doc_ids); } else { @@ -1692,6 +1692,7 @@ int art_fuzzy_search_i(art_tree *t, const unsigned char *term, const int term_le // documents that contain the previous token and/or filter ids size_t allowed_doc_ids_len = 0; const uint32_t* allowed_doc_ids = get_allowed_doc_ids(t, prev_token, filter_result_iterator, allowed_doc_ids_len); + filter_result_iterator.reset(); for(auto node: nodes) { art_topk_iter(node, token_order, max_words, exact_leaf, diff --git a/src/filter.cpp b/src/filter.cpp index 95fbfefc..18348ed6 100644 --- a/src/filter.cpp +++ b/src/filter.cpp @@ -283,6 +283,9 @@ Option toFilter(const std::string expression, } } return Option(true); + } else if (field_name == Index::SEQ_IDS_FIELD) { + filter_exp = {field_name, {}, {}}; + return Option(true); } auto field_it = search_schema.find(field_name); diff --git a/src/filter_result_iterator.cpp b/src/filter_result_iterator.cpp index 787ad578..c5098581 100644 --- a/src/filter_result_iterator.cpp +++ b/src/filter_result_iterator.cpp @@ -271,8 +271,12 @@ void filter_result_iterator_t::advance_string_filter_token_iterators() { for (uint32_t i = 0; i < posting_list_iterators.size(); i++) { auto& filter_value_tokens = posting_list_iterators[i]; - if (filter_value_tokens[0].valid() && filter_value_tokens[0].id() == seq_id) { - for (auto& iter: filter_value_tokens) { + if (!filter_value_tokens[0].valid() || filter_value_tokens[0].id() != seq_id) { + continue; + } + + for (auto& iter: filter_value_tokens) { + if (iter.valid()) { iter.next(); } } @@ -362,10 +366,7 @@ void filter_result_iterator_t::next() { return; } - const filter a_filter = filter_node->filter_exp; - - bool is_referenced_filter = !a_filter.referenced_collection_name.empty(); - if (is_referenced_filter) { + if (is_filter_result_initialized) { if (++result_index >= filter_result.count) { is_valid = false; return; @@ -380,15 +381,7 @@ void filter_result_iterator_t::next() { return; } - if (a_filter.field_name == "id") { - if (++result_index >= filter_result.count) { - is_valid = false; - return; - } - - seq_id = filter_result.docs[result_index]; - return; - } + const filter a_filter = filter_node->filter_exp; if (!index->field_is_indexed(a_filter.field_name)) { is_valid = false; @@ -397,16 +390,7 @@ void filter_result_iterator_t::next() { field f = index->search_schema.at(a_filter.field_name); - if (f.is_integer() || f.is_float() || f.is_bool()) { - result_index++; - if (result_index >= filter_result.count) { - is_valid = false; - return; - } - - seq_id = filter_result.docs[result_index]; - return; - } else if (f.is_string()) { + if (f.is_string()) { if (filter_node->filter_exp.apply_not_equals) { if (++seq_id < result_index) { return; @@ -443,6 +427,41 @@ void filter_result_iterator_t::next() { } } +void numeric_not_equals_filter(num_tree_t* const num_tree, + const int64_t value, + uint32_t*&& all_ids, + uint32_t&& all_ids_length, + uint32_t*& result_ids, + size_t& result_ids_len) { + uint32_t* to_exclude_ids = nullptr; + size_t to_exclude_ids_len = 0; + + num_tree->search(EQUALS, value, &to_exclude_ids, to_exclude_ids_len); + + result_ids_len = ArrayUtils::exclude_scalar(all_ids, all_ids_length, to_exclude_ids, to_exclude_ids_len, &result_ids); + + delete[] all_ids; + delete[] to_exclude_ids; +} + +void apply_not_equals(uint32_t*&& all_ids, + uint32_t&& all_ids_length, + uint32_t*& result_ids, + uint32_t& result_ids_len) { + + uint32_t* to_include_ids = nullptr; + size_t to_include_ids_len = 0; + + to_include_ids_len = ArrayUtils::exclude_scalar(all_ids, all_ids_length, result_ids, + result_ids_len, &to_include_ids); + + delete[] all_ids; + delete[] result_ids; + + result_ids = to_include_ids; + result_ids_len = to_include_ids_len; +} + void filter_result_iterator_t::init() { if (filter_node == nullptr) { return; @@ -487,6 +506,11 @@ void filter_result_iterator_t::init() { } seq_id = filter_result.docs[result_index]; + for (auto const& item: filter_result.reference_filter_results) { + reference[item.first] = item.second[result_index]; + } + + is_filter_result_initialized = true; return; } @@ -507,7 +531,22 @@ void filter_result_iterator_t::init() { filter_result.count = result_ids.size(); filter_result.docs = new uint32_t[result_ids.size()]; std::copy(result_ids.begin(), result_ids.end(), filter_result.docs); + seq_id = filter_result.docs[result_index]; + is_filter_result_initialized = true; + return; + } else if (a_filter.field_name == Index::SEQ_IDS_FIELD) { + if (index->seq_ids->num_ids() == 0) { + is_valid = false; + return; + } + + filter_result.count = index->seq_ids->num_ids(); + filter_result.docs = index->seq_ids->uncompress(); + + seq_id = filter_result.docs[result_index]; + is_filter_result_initialized = true; + return; } if (!index->field_is_indexed(a_filter.field_name)) { @@ -520,28 +559,40 @@ void filter_result_iterator_t::init() { if (f.is_integer()) { auto num_tree = index->numerical_index.at(a_filter.field_name); - // TODO: Handle not equals - for (size_t fi = 0; fi < a_filter.values.size(); fi++) { const std::string& filter_value = a_filter.values[fi]; int64_t value = (int64_t)std::stol(filter_value); + size_t result_size = filter_result.count; if (a_filter.comparators[fi] == RANGE_INCLUSIVE && fi+1 < a_filter.values.size()) { const std::string& next_filter_value = a_filter.values[fi + 1]; auto const range_end_value = (int64_t)std::stol(next_filter_value); - num_tree->range_inclusive_search(value, range_end_value, &filter_result.docs, - reinterpret_cast(filter_result.count)); + num_tree->range_inclusive_search(value, range_end_value, &filter_result.docs, result_size); fi++; + } else if (a_filter.comparators[fi] == NOT_EQUALS) { + numeric_not_equals_filter(num_tree, value, + index->seq_ids->uncompress(), index->seq_ids->num_ids(), + filter_result.docs, result_size); } else { - num_tree->search(a_filter.comparators[fi], value, - &filter_result.docs, reinterpret_cast(filter_result.count)); + num_tree->search(a_filter.comparators[fi], value, &filter_result.docs, result_size); } + + filter_result.count = result_size; + } + + if (a_filter.apply_not_equals) { + apply_not_equals(index->seq_ids->uncompress(), index->seq_ids->num_ids(), + filter_result.docs, filter_result.count); } if (filter_result.count == 0) { is_valid = false; return; } + + seq_id = filter_result.docs[result_index]; + is_filter_result_initialized = true; + return; } else if (f.is_float()) { auto num_tree = index->numerical_index.at(a_filter.field_name); @@ -550,22 +601,36 @@ void filter_result_iterator_t::init() { float value = (float)std::atof(filter_value.c_str()); int64_t float_int64 = Index::float_to_int64_t(value); + size_t result_size = filter_result.count; if (a_filter.comparators[fi] == RANGE_INCLUSIVE && fi+1 < a_filter.values.size()) { const std::string& next_filter_value = a_filter.values[fi+1]; int64_t range_end_value = Index::float_to_int64_t((float) std::atof(next_filter_value.c_str())); - num_tree->range_inclusive_search(float_int64, range_end_value, &filter_result.docs, - reinterpret_cast(filter_result.count)); + num_tree->range_inclusive_search(float_int64, range_end_value, &filter_result.docs, result_size); fi++; + } else if (a_filter.comparators[fi] == NOT_EQUALS) { + numeric_not_equals_filter(num_tree, float_int64, + index->seq_ids->uncompress(), index->seq_ids->num_ids(), + filter_result.docs, result_size); } else { - num_tree->search(a_filter.comparators[fi], float_int64, - &filter_result.docs, reinterpret_cast(filter_result.count)); + num_tree->search(a_filter.comparators[fi], float_int64, &filter_result.docs, result_size); } + + filter_result.count = result_size; + } + + if (a_filter.apply_not_equals) { + apply_not_equals(index->seq_ids->uncompress(), index->seq_ids->num_ids(), + filter_result.docs, filter_result.count); } if (filter_result.count == 0) { is_valid = false; return; } + + seq_id = filter_result.docs[result_index]; + is_filter_result_initialized = true; + return; } else if (f.is_bool()) { auto num_tree = index->numerical_index.at(a_filter.field_name); @@ -573,16 +638,32 @@ void filter_result_iterator_t::init() { for (const std::string& filter_value : a_filter.values) { int64_t bool_int64 = (filter_value == "1") ? 1 : 0; - num_tree->search(a_filter.comparators[value_index], bool_int64, - &filter_result.docs, reinterpret_cast(filter_result.count)); + size_t result_size = filter_result.count; + if (a_filter.comparators[value_index] == NOT_EQUALS) { + numeric_not_equals_filter(num_tree, bool_int64, + index->seq_ids->uncompress(), index->seq_ids->num_ids(), + filter_result.docs, result_size); + } else { + num_tree->search(a_filter.comparators[value_index], bool_int64, &filter_result.docs, result_size); + } + filter_result.count = result_size; value_index++; } + if (a_filter.apply_not_equals) { + apply_not_equals(index->seq_ids->uncompress(), index->seq_ids->num_ids(), + filter_result.docs, filter_result.count); + } + if (filter_result.count == 0) { is_valid = false; return; } + + seq_id = filter_result.docs[result_index]; + is_filter_result_initialized = true; + return; } else if (f.is_string()) { art_tree* t = index->search_index.at(a_filter.field_name); @@ -684,13 +765,13 @@ bool filter_result_iterator_t::valid() { } } - const filter a_filter = filter_node->filter_exp; - - if (!a_filter.referenced_collection_name.empty() || a_filter.field_name == "id") { + if (is_filter_result_initialized) { is_valid = result_index < filter_result.count; return is_valid; } + const filter a_filter = filter_node->filter_exp; + if (!index->field_is_indexed(a_filter.field_name)) { is_valid = false; return is_valid; @@ -698,10 +779,7 @@ bool filter_result_iterator_t::valid() { field f = index->search_schema.at(a_filter.field_name); - if (f.is_integer() || f.is_float() || f.is_bool()) { - is_valid = result_index < filter_result.count; - return is_valid; - } else if (f.is_string()) { + if (f.is_string()) { if (filter_node->filter_exp.apply_not_equals) { return seq_id < result_index; } @@ -741,10 +819,7 @@ void filter_result_iterator_t::skip_to(uint32_t id) { return; } - const filter a_filter = filter_node->filter_exp; - - bool is_referenced_filter = !a_filter.referenced_collection_name.empty(); - if (is_referenced_filter) { + if (is_filter_result_initialized) { while (filter_result.docs[result_index] < id && ++result_index < filter_result.count); if (result_index >= filter_result.count) { @@ -761,17 +836,7 @@ void filter_result_iterator_t::skip_to(uint32_t id) { return; } - if (a_filter.field_name == "id") { - while (filter_result.docs[result_index] < id && ++result_index < filter_result.count); - - if (result_index >= filter_result.count) { - is_valid = false; - return; - } - - seq_id = filter_result.docs[result_index]; - return; - } + const filter a_filter = filter_node->filter_exp; if (!index->field_is_indexed(a_filter.field_name)) { is_valid = false; @@ -780,17 +845,7 @@ void filter_result_iterator_t::skip_to(uint32_t id) { field f = index->search_schema.at(a_filter.field_name); - if (f.is_integer() || f.is_float() || f.is_bool()) { - while(result_index < filter_result.count && filter_result.docs[result_index] < id) { - result_index++; - } - - if (result_index >= filter_result.count) { - is_valid = false; - } - - return; - } else if (f.is_string()) { + if (f.is_string()) { if (filter_node->filter_exp.apply_not_equals) { if (id < seq_id) { return; @@ -897,7 +952,7 @@ bool filter_result_iterator_t::contains_atleast_one(const void *obj) { compact_posting_list_t* list = COMPACT_POSTING_PTR(obj); size_t i = 0; - while(i < list->length && valid()) { + while(i < list->length && is_valid) { size_t num_existing_offsets = list->id_offsets[i]; size_t existing_id = list->id_offsets[i + num_existing_offsets + 1]; @@ -916,7 +971,7 @@ bool filter_result_iterator_t::contains_atleast_one(const void *obj) { auto list = (posting_list_t*)(obj); posting_list_t::iterator_t it = list->new_iterator(); - while(it.valid() && valid()) { + while(it.valid() && is_valid) { uint32_t id = it.id(); if(id == seq_id) { @@ -943,6 +998,7 @@ void filter_result_iterator_t::reset() { // Reset the subtrees then apply operators to arrive at the first valid doc. left_it->reset(); right_it->reset(); + is_valid = true; if (filter_node->filter_operator == AND) { and_filter_iterators(); @@ -953,10 +1009,7 @@ void filter_result_iterator_t::reset() { return; } - const filter a_filter = filter_node->filter_exp; - - bool is_referenced_filter = !a_filter.referenced_collection_name.empty(); - if (is_referenced_filter || a_filter.field_name == "id") { + if (is_filter_result_initialized) { if (filter_result.count == 0) { is_valid = false; return; @@ -964,27 +1017,25 @@ void filter_result_iterator_t::reset() { result_index = 0; seq_id = filter_result.docs[result_index]; + + reference.clear(); + for (auto const& item: filter_result.reference_filter_results) { + reference[item.first] = item.second[result_index]; + } + is_valid = true; return; } + const filter a_filter = filter_node->filter_exp; + if (!index->field_is_indexed(a_filter.field_name)) { return; } field f = index->search_schema.at(a_filter.field_name); - if (f.is_integer() || f.is_float() || f.is_bool()) { - if (filter_result.count == 0) { - is_valid = false; - return; - } - - result_index = 0; - seq_id = filter_result.docs[result_index]; - is_valid = true; - return; - } else if (f.is_string()) { + if (f.is_string()) { posting_list_iterators.clear(); for(auto expanded_plist: expanded_plists) { delete expanded_plist; @@ -997,11 +1048,11 @@ void filter_result_iterator_t::reset() { } uint32_t filter_result_iterator_t::to_filter_id_array(uint32_t*& filter_array) { - if (!valid()) { + if (!is_valid) { return 0; } - if (can_get_ids()) { + if (is_filter_result_initialized) { filter_array = new uint32_t[filter_result.count]; std::copy(filter_result.docs, filter_result.docs + filter_result.count, filter_array); return filter_result.count; @@ -1011,7 +1062,7 @@ uint32_t filter_result_iterator_t::to_filter_id_array(uint32_t*& filter_array) { do { filter_ids.push_back(seq_id); next(); - } while (valid()); + } while (is_valid); filter_array = new uint32_t[filter_ids.size()]; std::copy(filter_ids.begin(), filter_ids.end(), filter_array); @@ -1020,11 +1071,11 @@ uint32_t filter_result_iterator_t::to_filter_id_array(uint32_t*& filter_array) { } uint32_t filter_result_iterator_t::and_scalar(const uint32_t* A, const uint32_t& lenA, uint32_t*& results) { - if (!valid()) { + if (!is_valid) { return 0; } - if (can_get_ids()) { + if (is_filter_result_initialized) { return ArrayUtils::and_scalar(A, lenA, filter_result.docs, filter_result.count, &results); } @@ -1115,20 +1166,23 @@ filter_result_iterator_t &filter_result_iterator_t::operator=(filter_result_iter seq_id = obj.seq_id; reference = std::move(obj.reference); status = std::move(obj.status); + is_filter_result_initialized = obj.is_filter_result_initialized; return *this; } -bool filter_result_iterator_t::can_get_ids() { - if (!filter_node->isOperator) { - const filter a_filter = filter_node->filter_exp; - field f = index->search_schema.at(a_filter.field_name); - - if (!a_filter.referenced_collection_name.empty() || a_filter.field_name == "id" || - (index->field_is_indexed(a_filter.field_name) && (f.is_integer() || f.is_float() || f.is_bool()))) { - return true; +void filter_result_iterator_t::get_n_ids(const uint32_t& n, std::vector& results) { + if (is_filter_result_initialized) { + for (uint32_t count = 0; count < n && result_index < filter_result.count; count++) { + results.push_back(filter_result.docs[result_index++]); } + + is_valid = result_index < filter_result.count; + return; } - return false; + for (uint32_t count = 0; count < n && is_valid; count++) { + results.push_back(seq_id); + next(); + } } diff --git a/src/index.cpp b/src/index.cpp index 83b23c2f..9e041f8e 100644 --- a/src/index.cpp +++ b/src/index.cpp @@ -2741,7 +2741,7 @@ Option Index::search(std::vector& field_query_tokens, cons return filter_init_op; } - if (filter_tree_root != nullptr && !filter_result_iterator.valid()) { + if (filter_tree_root != nullptr && !filter_result_iterator.is_valid) { return Option(true); } @@ -2806,7 +2806,7 @@ Option Index::search(std::vector& field_query_tokens, cons // for phrase query, parser will set field_query_tokens to "*", need to handle that if (is_wildcard_query && field_query_tokens[0].q_phrases.empty()) { const uint8_t field_id = (uint8_t)(FIELD_LIMIT_NUM - 0); - bool no_filters_provided = (filter_tree_root == nullptr && !filter_result_iterator.valid()); + bool no_filters_provided = (filter_tree_root == nullptr && !filter_result_iterator.is_valid); if(no_filters_provided && facets.empty() && curated_ids.empty() && vector_query.field_name.empty() && sort_fields_std.size() == 1 && sort_fields_std[0].name == sort_field_const::seq_id && @@ -2855,11 +2855,9 @@ Option Index::search(std::vector& field_query_tokens, cons store, doc_id_prefix, filter_tree_root); filter_result_iterator = filter_result_iterator_t(collection_name, this, filter_tree_root); + approx_filter_ids_length = filter_result_iterator.is_valid; } -// TODO: Curate ids at last -// curate_filtered_ids(curated_ids, excluded_result_ids, -// excluded_result_ids_size, filter_result.docs, filter_result.count, curated_ids_sorted); collate_included_ids({}, included_ids_map, curated_topster, searched_queries); if (!vector_query.field_name.empty()) { @@ -2875,8 +2873,7 @@ Option Index::search(std::vector& field_query_tokens, cons uint32_t filter_id_count = 0; while (!no_filters_provided && - filter_id_count < vector_query.flat_search_cutoff && - filter_result_iterator.valid()) { + filter_id_count < vector_query.flat_search_cutoff && filter_result_iterator.is_valid) { auto seq_id = filter_result_iterator.seq_id; std::vector values; @@ -2904,7 +2901,7 @@ Option Index::search(std::vector& field_query_tokens, cons } if(no_filters_provided || - (filter_id_count >= vector_query.flat_search_cutoff && filter_result_iterator.valid())) { + (filter_id_count >= vector_query.flat_search_cutoff && filter_result_iterator.is_valid)) { dist_labels.clear(); VectorFilterFunctor filterFunctor(&filter_result_iterator); @@ -2973,7 +2970,19 @@ Option Index::search(std::vector& field_query_tokens, cons all_result_ids, all_result_ids_len, filter_result_iterator, approx_filter_ids_length, concurrency, sort_order, field_values, geopoint_indices); + filter_result_iterator.reset(); } + + // filter tree was initialized to have all sequence ids in this flow. + if (no_filters_provided) { + delete filter_tree_root; + filter_tree_root = nullptr; + } + + uint32_t _all_result_ids_len = all_result_ids_len; + curate_filtered_ids(curated_ids, excluded_result_ids, + excluded_result_ids_size, all_result_ids, _all_result_ids_len, curated_ids_sorted); + all_result_ids_len = _all_result_ids_len; } else { // Non-wildcard // In multi-field searches, a record can be matched across different fields, so we use this for aggregation @@ -3414,7 +3423,7 @@ void Index::process_curated_ids(const std::vector> // if `filter_curated_hits` is enabled, we will remove curated hits that don't match filter condition std::set included_ids_set; - if(filter_result_iterator.valid() && filter_curated_hits) { + if(filter_result_iterator.is_valid && filter_curated_hits) { for (const auto &included_id: included_ids_vec) { auto result = filter_result_iterator.valid(included_id); @@ -3683,6 +3692,7 @@ void Index::fuzzy_search_fields(const std::vector& the_fields, art_fuzzy_search_i(search_index.at(the_field.name), (const unsigned char *) token.c_str(), token_len, costs[token_index], costs[token_index], max_candidates, token_order, prefix_search, false, "", filter_result_iterator, field_leaves, unique_tokens); + filter_result_iterator.reset(); if(field_leaves.empty()) { // look at the next field @@ -4649,7 +4659,7 @@ void Index::do_infix_search(const size_t num_search_fields, const std::vector batch_result_ids; batch_result_ids.reserve(window_size); - if (filter_result_iterator.can_get_ids()) { - while (batch_result_ids.size() < window_size && filter_index < filter_result_iterator.get_length()) { - batch_result_ids.push_back(filter_result_iterator.get_ids()[filter_index++]); - } - } else { - do { - batch_result_ids.push_back(filter_result_iterator.seq_id); - filter_result_iterator.next(); - } while (batch_result_ids.size() < window_size && filter_result_iterator.valid()); - } + filter_result_iterator.get_n_ids(window_size, batch_result_ids); num_queued++; diff --git a/test/filter_test.cpp b/test/filter_test.cpp index 6cab88b5..86cdd0f5 100644 --- a/test/filter_test.cpp +++ b/test/filter_test.cpp @@ -65,7 +65,7 @@ TEST_F(FilterTest, FilterTreeIterator) { auto iter_null_filter_tree_test = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root); ASSERT_TRUE(iter_null_filter_tree_test.init_status().ok()); - ASSERT_FALSE(iter_null_filter_tree_test.valid()); + ASSERT_FALSE(iter_null_filter_tree_test.is_valid); Option filter_op = filter::parse_filter_query("name: foo", coll->get_schema(), store, doc_id_prefix, filter_tree_root); @@ -74,7 +74,7 @@ TEST_F(FilterTest, FilterTreeIterator) { auto iter_no_match_test = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root); ASSERT_TRUE(iter_no_match_test.init_status().ok()); - ASSERT_FALSE(iter_no_match_test.valid()); + ASSERT_FALSE(iter_no_match_test.is_valid); delete filter_tree_root; filter_tree_root = nullptr; @@ -85,7 +85,7 @@ TEST_F(FilterTest, FilterTreeIterator) { auto iter_no_match_multi_test = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root); ASSERT_TRUE(iter_no_match_multi_test.init_status().ok()); - ASSERT_FALSE(iter_no_match_multi_test.valid()); + ASSERT_FALSE(iter_no_match_multi_test.is_valid); delete filter_tree_root; filter_tree_root = nullptr; @@ -97,11 +97,11 @@ TEST_F(FilterTest, FilterTreeIterator) { ASSERT_TRUE(iter_contains_test.init_status().ok()); for (uint32_t i = 0; i < 5; i++) { - ASSERT_TRUE(iter_contains_test.valid()); + ASSERT_TRUE(iter_contains_test.is_valid); ASSERT_EQ(i, iter_contains_test.seq_id); iter_contains_test.next(); } - ASSERT_FALSE(iter_contains_test.valid()); + ASSERT_FALSE(iter_contains_test.is_valid); delete filter_tree_root; filter_tree_root = nullptr; @@ -113,11 +113,11 @@ TEST_F(FilterTest, FilterTreeIterator) { ASSERT_TRUE(iter_contains_multi_test.init_status().ok()); for (uint32_t i = 0; i < 5; i++) { - ASSERT_TRUE(iter_contains_multi_test.valid()); + ASSERT_TRUE(iter_contains_multi_test.is_valid); ASSERT_EQ(i, iter_contains_multi_test.seq_id); iter_contains_multi_test.next(); } - ASSERT_FALSE(iter_contains_multi_test.valid()); + ASSERT_FALSE(iter_contains_multi_test.is_valid); delete filter_tree_root; filter_tree_root = nullptr; @@ -129,11 +129,11 @@ TEST_F(FilterTest, FilterTreeIterator) { ASSERT_TRUE(iter_exact_match_1_test.init_status().ok()); for (uint32_t i = 0; i < 5; i++) { - ASSERT_TRUE(iter_exact_match_1_test.valid()); + ASSERT_TRUE(iter_exact_match_1_test.is_valid); ASSERT_EQ(i, iter_exact_match_1_test.seq_id); iter_exact_match_1_test.next(); } - ASSERT_FALSE(iter_exact_match_1_test.valid()); + ASSERT_FALSE(iter_exact_match_1_test.is_valid); delete filter_tree_root; filter_tree_root = nullptr; @@ -143,7 +143,7 @@ TEST_F(FilterTest, FilterTreeIterator) { auto iter_exact_match_2_test = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root); ASSERT_TRUE(iter_exact_match_2_test.init_status().ok()); - ASSERT_FALSE(iter_exact_match_2_test.valid()); + ASSERT_FALSE(iter_exact_match_2_test.is_valid); delete filter_tree_root; filter_tree_root = nullptr; @@ -156,11 +156,11 @@ TEST_F(FilterTest, FilterTreeIterator) { std::vector expected = {0, 2, 3, 4}; for (auto const& i : expected) { - ASSERT_TRUE(iter_exact_match_multi_test.valid()); + ASSERT_TRUE(iter_exact_match_multi_test.is_valid); ASSERT_EQ(i, iter_exact_match_multi_test.seq_id); iter_exact_match_multi_test.next(); } - ASSERT_FALSE(iter_exact_match_multi_test.valid()); + ASSERT_FALSE(iter_exact_match_multi_test.is_valid); delete filter_tree_root; filter_tree_root = nullptr; @@ -173,12 +173,12 @@ TEST_F(FilterTest, FilterTreeIterator) { expected = {1, 3}; for (auto const& i : expected) { - ASSERT_TRUE(iter_not_equals_test.valid()); + ASSERT_TRUE(iter_not_equals_test.is_valid); ASSERT_EQ(i, iter_not_equals_test.seq_id); iter_not_equals_test.next(); } - ASSERT_FALSE(iter_not_equals_test.valid()); + ASSERT_FALSE(iter_not_equals_test.is_valid); delete filter_tree_root; filter_tree_root = nullptr; @@ -189,13 +189,13 @@ TEST_F(FilterTest, FilterTreeIterator) { auto iter_skip_test = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root); ASSERT_TRUE(iter_skip_test.init_status().ok()); - ASSERT_TRUE(iter_skip_test.valid()); + ASSERT_TRUE(iter_skip_test.is_valid); iter_skip_test.skip_to(3); - ASSERT_TRUE(iter_skip_test.valid()); + ASSERT_TRUE(iter_skip_test.is_valid); ASSERT_EQ(4, iter_skip_test.seq_id); iter_skip_test.next(); - ASSERT_FALSE(iter_skip_test.valid()); + ASSERT_FALSE(iter_skip_test.is_valid); delete filter_tree_root; filter_tree_root = nullptr; @@ -206,11 +206,11 @@ TEST_F(FilterTest, FilterTreeIterator) { auto iter_and_test = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root); ASSERT_TRUE(iter_and_test.init_status().ok()); - ASSERT_TRUE(iter_and_test.valid()); + ASSERT_TRUE(iter_and_test.is_valid); ASSERT_EQ(1, iter_and_test.seq_id); iter_and_test.next(); - ASSERT_FALSE(iter_and_test.valid()); + ASSERT_FALSE(iter_and_test.is_valid); delete filter_tree_root; filter_tree_root = nullptr; @@ -234,12 +234,12 @@ TEST_F(FilterTest, FilterTreeIterator) { expected = {2, 4, 5}; for (auto const& i : expected) { - ASSERT_TRUE(iter_or_test.valid()); + ASSERT_TRUE(iter_or_test.is_valid); ASSERT_EQ(i, iter_or_test.seq_id); iter_or_test.next(); } - ASSERT_FALSE(iter_or_test.valid()); + ASSERT_FALSE(iter_or_test.is_valid); delete filter_tree_root; filter_tree_root = nullptr; @@ -250,17 +250,17 @@ TEST_F(FilterTest, FilterTreeIterator) { auto iter_skip_complex_filter_test = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root); ASSERT_TRUE(iter_skip_complex_filter_test.init_status().ok()); - ASSERT_TRUE(iter_skip_complex_filter_test.valid()); + ASSERT_TRUE(iter_skip_complex_filter_test.is_valid); iter_skip_complex_filter_test.skip_to(4); expected = {4, 5}; for (auto const& i : expected) { - ASSERT_TRUE(iter_skip_complex_filter_test.valid()); + ASSERT_TRUE(iter_skip_complex_filter_test.is_valid); ASSERT_EQ(i, iter_skip_complex_filter_test.seq_id); iter_skip_complex_filter_test.next(); } - ASSERT_FALSE(iter_skip_complex_filter_test.valid()); + ASSERT_FALSE(iter_skip_complex_filter_test.is_valid); delete filter_tree_root; filter_tree_root = nullptr; @@ -358,20 +358,20 @@ TEST_F(FilterTest, FilterTreeIterator) { expected = {0, 2, 3, 4}; for (auto const& i : expected) { - ASSERT_TRUE(iter_reset_test.valid()); + ASSERT_TRUE(iter_reset_test.is_valid); ASSERT_EQ(i, iter_reset_test.seq_id); iter_reset_test.next(); } - ASSERT_FALSE(iter_reset_test.valid()); + ASSERT_FALSE(iter_reset_test.is_valid); iter_reset_test.reset(); for (auto const& i : expected) { - ASSERT_TRUE(iter_reset_test.valid()); + ASSERT_TRUE(iter_reset_test.is_valid); ASSERT_EQ(i, iter_reset_test.seq_id); iter_reset_test.next(); } - ASSERT_FALSE(iter_reset_test.valid()); + ASSERT_FALSE(iter_reset_test.is_valid); auto iter_move_assignment_test = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root); @@ -380,11 +380,11 @@ TEST_F(FilterTest, FilterTreeIterator) { expected = {0, 2, 3, 4}; for (auto const& i : expected) { - ASSERT_TRUE(iter_move_assignment_test.valid()); + ASSERT_TRUE(iter_move_assignment_test.is_valid); ASSERT_EQ(i, iter_move_assignment_test.seq_id); iter_move_assignment_test.next(); } - ASSERT_FALSE(iter_move_assignment_test.valid()); + ASSERT_FALSE(iter_move_assignment_test.is_valid); delete filter_tree_root; filter_tree_root = nullptr; @@ -405,7 +405,7 @@ TEST_F(FilterTest, FilterTreeIterator) { for (uint32_t i = 0; i < filter_ids_length; i++) { ASSERT_EQ(expected[i], filter_ids[i]); } - ASSERT_FALSE(iter_to_array_test.valid()); + ASSERT_FALSE(iter_to_array_test.is_valid); delete filter_ids; @@ -422,7 +422,7 @@ TEST_F(FilterTest, FilterTreeIterator) { for (uint32_t i = 0; i < and_result_length; i++) { ASSERT_EQ(expected[i], and_result[i]); } - ASSERT_FALSE(iter_and_scalar_test.valid()); + ASSERT_FALSE(iter_and_scalar_test.is_valid); delete and_result; delete filter_tree_root;