From 034f20f4e4670447fd51755fdfa6890601f0de42 Mon Sep 17 00:00:00 2001 From: Harpreet Sangar Date: Tue, 18 Apr 2023 16:53:24 +0530 Subject: [PATCH] Expose filter ids from iterator where possible. --- include/filter_result_iterator.h | 10 ++++++++++ src/filter_result_iterator.cpp | 33 +++++++++++++++++++++----------- src/index.cpp | 21 +++++++++++++------- 3 files changed, 46 insertions(+), 18 deletions(-) diff --git a/include/filter_result_iterator.h b/include/filter_result_iterator.h index 46ec11bc..bd9e66f0 100644 --- a/include/filter_result_iterator.h +++ b/include/filter_result_iterator.h @@ -127,6 +127,14 @@ private: void doc_matching_string_filter(bool field_is_array); public: + uint32_t* get_ids() { + return filter_result.docs; + } + + uint32_t get_length() { + return filter_result.count; + } + uint32_t seq_id = 0; /// Collection name -> references std::map reference; @@ -180,4 +188,6 @@ public: /// Performs AND with the contents of A and allocates a new array of results. /// \return size of the results array uint32_t and_scalar(const uint32_t* A, const uint32_t& lenA, uint32_t*& results); + + bool can_get_ids(); }; diff --git a/src/filter_result_iterator.cpp b/src/filter_result_iterator.cpp index d57c8895..787ad578 100644 --- a/src/filter_result_iterator.cpp +++ b/src/filter_result_iterator.cpp @@ -1001,17 +1001,10 @@ uint32_t filter_result_iterator_t::to_filter_id_array(uint32_t*& filter_array) { return 0; } - if (!filter_node->isOperator) { - const filter a_filter = filter_node->filter_exp; - field f = index->search_schema.at(a_filter.field_name); - - if (!a_filter.referenced_collection_name.empty() || a_filter.field_name == "id" || - (index->field_is_indexed(a_filter.field_name) && (f.is_integer() || f.is_float() || f.is_bool()))) { - - filter_array = new uint32_t[filter_result.count]; - std::copy(filter_result.docs, filter_result.docs + filter_result.count, filter_array); - return filter_result.count; - } + if (can_get_ids()) { + filter_array = new uint32_t[filter_result.count]; + std::copy(filter_result.docs, filter_result.docs + filter_result.count, filter_array); + return filter_result.count; } std::vector filter_ids; @@ -1031,6 +1024,10 @@ uint32_t filter_result_iterator_t::and_scalar(const uint32_t* A, const uint32_t& return 0; } + if (can_get_ids()) { + return ArrayUtils::and_scalar(A, lenA, filter_result.docs, filter_result.count, &results); + } + std::vector filter_ids; for (uint32_t i = 0; i < lenA; i++) { auto result = valid(A[i]); @@ -1121,3 +1118,17 @@ filter_result_iterator_t &filter_result_iterator_t::operator=(filter_result_iter return *this; } + +bool filter_result_iterator_t::can_get_ids() { + if (!filter_node->isOperator) { + const filter a_filter = filter_node->filter_exp; + field f = index->search_schema.at(a_filter.field_name); + + if (!a_filter.referenced_collection_name.empty() || a_filter.field_name == "id" || + (index->field_is_indexed(a_filter.field_name) && (f.is_integer() || f.is_float() || f.is_bool()))) { + return true; + } + } + + return false; +} diff --git a/src/index.cpp b/src/index.cpp index 824c1723..83b23c2f 100644 --- a/src/index.cpp +++ b/src/index.cpp @@ -4977,14 +4977,23 @@ void Index::search_wildcard(filter_node_t const* const& filter_tree_root, const auto parent_search_stop_ms = search_stop_us; auto parent_search_cutoff = search_cutoff; - for(size_t thread_id = 0; thread_id < num_threads && filter_result_iterator.valid(); thread_id++) { + for(size_t thread_id = 0; thread_id < num_threads && + (filter_result_iterator.can_get_ids() ? + filter_index < filter_result_iterator.get_length() : + filter_result_iterator.valid()); thread_id++) { std::vector batch_result_ids; batch_result_ids.reserve(window_size); - do { - batch_result_ids.push_back(filter_result_iterator.seq_id); - filter_result_iterator.next(); - } while (batch_result_ids.size() < window_size && filter_result_iterator.valid()); + if (filter_result_iterator.can_get_ids()) { + while (batch_result_ids.size() < window_size && filter_index < filter_result_iterator.get_length()) { + batch_result_ids.push_back(filter_result_iterator.get_ids()[filter_index++]); + } + } else { + do { + batch_result_ids.push_back(filter_result_iterator.seq_id); + filter_result_iterator.next(); + } while (batch_result_ids.size() < window_size && filter_result_iterator.valid()); + } num_queued++; @@ -5045,8 +5054,6 @@ void Index::search_wildcard(filter_node_t const* const& filter_tree_root, parent_search_cutoff = parent_search_cutoff || search_cutoff; cv_process.notify_one(); }); - - filter_index += batch_result_ids.size(); } std::unique_lock lock_process(m_process);