From d796391464cc6e044c91cff1f9209220364370f6 Mon Sep 17 00:00:00 2001 From: Harpreet Sangar Date: Tue, 11 Apr 2023 17:44:32 +0530 Subject: [PATCH] Add `approx_filter_ids_length` field. --- include/filter_result_iterator.h | 10 ++++++++-- src/filter_result_iterator.cpp | 6 ++++-- src/index.cpp | 3 ++- src/or_iterator.cpp | 2 +- 4 files changed, 15 insertions(+), 6 deletions(-) diff --git a/include/filter_result_iterator.h b/include/filter_result_iterator.h index b67d67ca..f5ae22a6 100644 --- a/include/filter_result_iterator.h +++ b/include/filter_result_iterator.h @@ -125,12 +125,18 @@ private: public: uint32_t seq_id = 0; - // Collection name -> references + /// Collection name -> references std::map reference; Option status = Option(true); + /// Holds the upper-bound of the number of seq ids this iterator would match. + /// Useful in a scenario where we need to differentiate between filter iterator not matching any document v/s filter + /// iterator reaching it's end. (is_valid would be false in both these cases) + uint32_t approx_filter_ids_length; + explicit filter_result_iterator_t(const std::string collection_name, - Index const* const index, filter_node_t const* const filter_node); + Index const* const index, filter_node_t const* const filter_node, + uint32_t approx_filter_ids_length = UINT32_MAX); ~filter_result_iterator_t(); diff --git a/src/filter_result_iterator.cpp b/src/filter_result_iterator.cpp index 21a4128f..f5bd8b5f 100644 --- a/src/filter_result_iterator.cpp +++ b/src/filter_result_iterator.cpp @@ -835,10 +835,12 @@ uint32_t filter_result_iterator_t::and_scalar(const uint32_t* A, const uint32_t& } filter_result_iterator_t::filter_result_iterator_t(const std::string collection_name, const Index *const index, - const filter_node_t *const filter_node) : + const filter_node_t *const filter_node, + uint32_t approx_filter_ids_length) : collection_name(collection_name), index(index), - filter_node(filter_node) { + filter_node(filter_node), + approx_filter_ids_length(approx_filter_ids_length) { if (filter_node == nullptr) { is_valid = false; return; diff --git a/src/index.cpp b/src/index.cpp index 29938911..ab8f8576 100644 --- a/src/index.cpp +++ b/src/index.cpp @@ -2713,7 +2713,8 @@ Option Index::search(std::vector& field_query_tokens, cons return rearrange_op; } - auto filter_result_iterator = filter_result_iterator_t(collection_name, this, filter_tree_root); + auto filter_result_iterator = filter_result_iterator_t(collection_name, this, filter_tree_root, + approx_filter_ids_length); auto filter_init_op = filter_result_iterator.init_status(); if (!filter_init_op.ok()) { return filter_init_op; diff --git a/src/or_iterator.cpp b/src/or_iterator.cpp index df4404cb..8dd9d487 100644 --- a/src/or_iterator.cpp +++ b/src/or_iterator.cpp @@ -208,7 +208,7 @@ bool or_iterator_t::take_id(result_iter_state_t& istate, uint32_t id, bool& is_e return false; } - if (istate.fit != nullptr) { + if (istate.fit != nullptr && istate.fit->approx_filter_ids_length > 0) { return (istate.fit->valid(id) == 1); }