From 72b8d4c26ece8280016583de1085120b10eea5c7 Mon Sep 17 00:00:00 2001
From: Harpreet Sangar <happy_san@protonmail.com>
Date: Sun, 22 Jan 2023 12:02:29 +0530
Subject: [PATCH 01/51] Filter by reference.

---
 include/collection.h |  2 ++
 src/collection.cpp   | 16 +++++++++++
 src/field.cpp        | 65 +++++++++++++++++++++++++++++++++++++-------
 3 files changed, 73 insertions(+), 10 deletions(-)
diff --git a/include/collection.h b/include/collection.h
index 7d31e9e5..977a83dc 100644
--- a/include/collection.h
+++ b/include/collection.h
@@ -463,6 +463,8 @@ public:
 
     Option<bool> validate_reference_filter(const std::string& filter_query) const;
 
+    Option<bool> validate_reference_filter(const std::string& filter_query) const;
+
     Option<nlohmann::json> get(const std::string & id) const;
 
     Option<std::string> remove(const std::string & id, bool remove_from_store = true);
diff --git a/src/collection.cpp b/src/collection.cpp
index 6eef0147..737341f2 100644
--- a/src/collection.cpp
+++ b/src/collection.cpp
@@ -2584,6 +2584,22 @@ Option<bool> Collection::validate_reference_filter(const std::string& filter_que
     return Option<bool>(true);
 }
 
+Option<bool> Collection::validate_reference_filter(const std::string& filter_query) const {
+    std::shared_lock lock(mutex);
+
+    const std::string doc_id_prefix = std::to_string(collection_id) + "_" + DOC_ID_PREFIX + "_";
+    filter_node_t* filter_tree_root = nullptr;
+    Option<bool> filter_op = filter::parse_filter_query(filter_query, search_schema,
+                                                        store, doc_id_prefix, filter_tree_root);
+
+    if(!filter_op.ok()) {
+        return filter_op;
+    }
+
+    delete filter_tree_root;
+    return Option<bool>(true);
+}
+
 bool Collection::facet_value_to_string(const facet &a_facet, const facet_count_t &facet_count,
                                        const nlohmann::json &document, std::string &value) const {
 
diff --git a/src/field.cpp b/src/field.cpp
index 89e7e563..d9a85890 100644
--- a/src/field.cpp
+++ b/src/field.cpp
@@ -388,24 +388,27 @@ Option<bool> toParseTree(std::queue<std::string>& postfix, filter_node_t*& root,
                          int& and_operator_count,
                          int& or_operator_count) {
     std::stack<filter_node_t*> nodeStack;
+    bool is_successful = true;
+    std::string error_message;
 
     while (!postfix.empty()) {
         const std::string expression = postfix.front();
         postfix.pop();
 
-        filter_node_t* filter_node = nullptr;
+        filter_node_t *filter_node = nullptr;
         if (isOperator(expression)) {
-            auto message = "Could not parse the filter query: unbalanced `" + expression + "` operands.";
-
             if (nodeStack.empty()) {
-                return Option<bool>(400, message);
+                is_successful = false;
+                error_message = "Could not parse the filter query: unbalanced `" + expression + "` operands.";
+                break;
             }
             auto operandB = nodeStack.top();
             nodeStack.pop();
 
             if (nodeStack.empty()) {
-                delete operandB;
-                return Option<bool>(400, message);
+                is_successful = false;
+                error_message = "Could not parse the filter query: unbalanced `" + expression + "` operands.";
+                break;
             }
             auto operandA = nodeStack.top();
             nodeStack.pop();
@@ -414,7 +417,6 @@ Option<bool> toParseTree(std::queue<std::string>& postfix, filter_node_t*& root,
             filter_node = new filter_node_t(expression == "&&" ? AND : OR, operandA, operandB);
         } else {
             filter filter_exp;
-<<<<<<< HEAD
 
             // Expected value: $Collection(...)
             bool is_referenced_filter = (expression[0] == '$' && expression[expression.size() - 1] == ')');
@@ -439,7 +441,7 @@ Option<bool> toParseTree(std::queue<std::string>& postfix, filter_node_t*& root,
             } else {
                 Option<bool> toFilter_op = toFilter(expression, filter_exp, search_schema, store, doc_id_prefix);
                 if (!toFilter_op.ok()) {
-		    while(!nodeStack.empty()) {
+		            while(!nodeStack.empty()) {
                         auto filterNode = nodeStack.top();
                         delete filterNode;
                         nodeStack.pop();
@@ -448,17 +450,60 @@ Option<bool> toParseTree(std::queue<std::string>& postfix, filter_node_t*& root,
                 }
             }
 
-            filter_node = new filter_node_t(filter_exp);
+            // Expected value: $Collection(...)
+            bool is_referenced_filter = (expression[0] == '$' && expression[expression.size() - 1] == ')');
+            if (is_referenced_filter) {
+                size_t parenthesis_index = expression.find('(');
+
+                std::string collection_name = expression.substr(1, parenthesis_index - 1);
+                auto &cm = CollectionManager::get_instance();
+                auto collection = cm.get_collection(collection_name);
+                if (collection == nullptr) {
+                    is_successful = false;
+                    error_message = "Referenced collection `" + collection_name + "` not found.";
+                    break;
+                }
+
+                filter_exp = {expression.substr(parenthesis_index + 1, expression.size() - parenthesis_index - 2)};
+                filter_exp.referenced_collection_name = collection_name;
+
+                auto op = collection->validate_reference_filter(filter_exp.field_name);
+                if (!op.ok()) {
+                    is_successful = false;
+                    error_message = "Failed to parse reference filter on `" + collection_name + "` collection: " +
+                                        op.error();
+                    break;
+                }
+            } else {
+                Option<bool> toFilter_op = toFilter(expression, filter_exp, search_schema, store, doc_id_prefix);
+                if (!toFilter_op.ok()) {
+                    is_successful = false;
+                    error_message = toFilter_op.error();
+                    break;
+                }
+
+                filter_node = new filter_node_t(filter_exp);
+            }
         }
 
         nodeStack.push(filter_node);
     }
 
+    if (!is_successful) {
+        while (!nodeStack.empty()) {
+            auto filterNode = nodeStack.top();
+            delete filterNode;
+            nodeStack.pop();
+        }
+
+        return Option<bool>(400, error_message);
+    }
+
     if (nodeStack.empty()) {
         return Option<bool>(400, "Filter query cannot be empty.");
     }
-
     root = nodeStack.top();
+
     return Option<bool>(true);
 }
 

From ad958be7bb690f82c7f57ed76fceb37e30b1d7b6 Mon Sep 17 00:00:00 2001
From: Harpreet Sangar <happy_san@protonmail.com>
Date: Fri, 27 Jan 2023 12:57:13 +0530
Subject: [PATCH 02/51] Add `Index::rearranging_recursive_filter`.

---
 include/field.h | 1 +
 src/index.cpp   | 1 -
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/field.h b/include/field.h
index 37fa1936..7a90fd9d 100644
--- a/include/field.h
+++ b/include/field.h
@@ -606,6 +606,7 @@ struct filter_node_t {
     bool isOperator;
     filter_node_t* left = nullptr;
     filter_node_t* right = nullptr;
+    filter_tree_metrics* metrics = nullptr;
 
     filter_node_t(filter filter_exp)
             : filter_exp(std::move(filter_exp)),
diff --git a/src/index.cpp b/src/index.cpp
index 0379ae43..1b20867a 100644
--- a/src/index.cpp
+++ b/src/index.cpp
@@ -1830,7 +1830,6 @@ Option<bool> Index::rearrange_filter_tree(filter_node_t* const root,
     if (root == nullptr) {
         return Option(true);
     }
-
     if (root->isOperator) {
         uint32_t l_filter_ids_length = 0;
         if (root->left != nullptr) {

From f4b8912e19857138298bcbeef0a342f3dab73266 Mon Sep 17 00:00:00 2001
From: Harpreet Sangar <happy_san@protonmail.com>
Date: Fri, 27 Jan 2023 19:58:06 +0530
Subject: [PATCH 03/51] Add `Index::adaptive_filter`.

---
 src/index.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/index.cpp b/src/index.cpp
index 1b20867a..0379ae43 100644
--- a/src/index.cpp
+++ b/src/index.cpp
@@ -1830,6 +1830,7 @@ Option<bool> Index::rearrange_filter_tree(filter_node_t* const root,
     if (root == nullptr) {
         return Option(true);
     }
+
     if (root->isOperator) {
         uint32_t l_filter_ids_length = 0;
         if (root->left != nullptr) {

From cdfa3b7a708c556772c1ec34714bb11eb9a907e2 Mon Sep 17 00:00:00 2001
From: Harpreet Sangar <happy_san@protonmail.com>
Date: Fri, 3 Feb 2023 14:30:17 +0530
Subject: [PATCH 04/51] Fix double locking of collection mutex.

---
 src/collection.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/collection.cpp b/src/collection.cpp
index 737341f2..95190ac7 100644
--- a/src/collection.cpp
+++ b/src/collection.cpp
@@ -1515,7 +1515,6 @@ Option<nlohmann::json> Collection::search(std::string  raw_query,
     }
 
     // for grouping we have to re-aggregate
-
     Topster& topster = *search_params->topster;
     Topster& curated_topster = *search_params->curated_topster;
 

From 2d39461ecac47058f962e0390bc7eebca21b9fdc Mon Sep 17 00:00:00 2001
From: Harpreet Sangar <happy_san@protonmail.com>
Date: Mon, 27 Feb 2023 11:00:25 +0530
Subject: [PATCH 05/51] Fix rebase error.

---
 src/field.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/field.cpp b/src/field.cpp
index d9a85890..729ae55f 100644
--- a/src/field.cpp
+++ b/src/field.cpp
@@ -481,9 +481,9 @@ Option<bool> toParseTree(std::queue<std::string>& postfix, filter_node_t*& root,
                     error_message = toFilter_op.error();
                     break;
                 }
-
-                filter_node = new filter_node_t(filter_exp);
             }
+
+            filter_node = new filter_node_t(filter_exp);
         }
 
         nodeStack.push(filter_node);

From e78d20991195536eed97192b52cc2c82858ea4af Mon Sep 17 00:00:00 2001
From: Harpreet Sangar <happy_san@protonmail.com>
Date: Fri, 3 Mar 2023 10:37:33 +0530
Subject: [PATCH 06/51] Add `filter_result_t` struct. Add
 `reference_filter_result_t` struct. Add support for lazy filtering. Update
 `rearrange_filter_tree` to return approximate count of filter matches.

---
 .bazelrc                      |   2 -
 include/collection.h          |   6 +-
 include/field.h               |  11 +-
 include/index.h               |  27 ++-
 include/num_tree.h            |  27 +++
 include/posting.h             |   4 +-
 include/topster.h             |   7 +-
 src/collection.cpp            |  24 +--
 src/field.cpp                 |  32 ----
 src/index.cpp                 | 323 ++++++++++++++++++++++++++++------
 src/num_tree.cpp              | 172 ++++++++++++++++++
 src/posting.cpp               |  27 ++-
 test/collection_join_test.cpp |  10 +-
 13 files changed, 541 insertions(+), 131 deletions(-)

diff --git a/.bazelrc b/.bazelrc
index 0a7fa3ae..933545b7 100644
--- a/.bazelrc
+++ b/.bazelrc
@@ -5,5 +5,3 @@ build --cxxopt="-std=c++17"
 
 test --jobs=6
 build --enable_platform_specific_config
-
-build:linux --action_env=BAZEL_LINKLIBS="-l%:libstdc++.a -l%:libgcc.a"
diff --git a/include/collection.h b/include/collection.h
index 977a83dc..27bf7920 100644
--- a/include/collection.h
+++ b/include/collection.h
@@ -268,6 +268,8 @@ private:
     
 
 
+    Option<std::string> get_reference_field(const std::string & collection_name) const;
+
 public:
 
     enum {MAX_ARRAY_MATCHES = 5};
@@ -455,16 +457,12 @@ public:
 
     Option<bool> get_filter_ids(const std::string & filter_query, filter_result_t& filter_result) const;
 
-    Option<std::string> get_reference_field(const std::string & collection_name) const;
-
     Option<bool> get_reference_filter_ids(const std::string & filter_query,
                                           filter_result_t& filter_result,
                                           const std::string & collection_name) const;
 
     Option<bool> validate_reference_filter(const std::string& filter_query) const;
 
-    Option<bool> validate_reference_filter(const std::string& filter_query) const;
-
     Option<nlohmann::json> get(const std::string & id) const;
 
     Option<std::string> remove(const std::string & id, bool remove_from_store = true);
diff --git a/include/field.h b/include/field.h
index 7a90fd9d..776481d2 100644
--- a/include/field.h
+++ b/include/field.h
@@ -641,11 +641,18 @@ struct reference_filter_result_t {
 struct filter_result_t {
     uint32_t count = 0;
     uint32_t* docs = nullptr;
-    reference_filter_result_t* reference_filter_result = nullptr;
+    // Collection name -> Reference filter result
+    std::map<std::string, reference_filter_result_t*> reference_filter_results;
+
+    filter_result_t() {}
+
+    filter_result_t(uint32_t count, uint32_t* docs) : count(count), docs(docs) {}
 
     ~filter_result_t() {
         delete[] docs;
-        delete[] reference_filter_result;
+        for (const auto &item: reference_filter_results) {
+            delete[] item.second;
+        }
     }
 };
 
diff --git a/include/index.h b/include/index.h
index 66f4e5de..0ce10daf 100644
--- a/include/index.h
+++ b/include/index.h
@@ -467,16 +467,28 @@ private:
 
     void numeric_not_equals_filter(num_tree_t* const num_tree,
                                    const int64_t value,
-                                   uint32_t*& ids,
-                                   size_t& ids_len) const;
+                                   const uint32_t& context_ids_length,
+                                   const uint32_t* context_ids,
+                                   size_t& ids_len,
+                                   uint32_t*& ids) const;
+
+    bool field_is_indexed(const std::string& field_name) const;
 
     Option<bool> do_filtering(filter_node_t* const root,
                               filter_result_t& result,
-                              const std::string& collection_name = "") const;
+                              const std::string& collection_name = "",
+                              const uint32_t& context_ids_length = 0,
+                              const uint32_t* context_ids = nullptr) const;
 
-    Option<bool> rearranging_recursive_filter (filter_node_t* const filter_tree_root,
-                                               filter_result_t& result,
-                                               const std::string& collection_name = "") const;
+    void aproximate_numerical_match(num_tree_t* const num_tree,
+                                    const NUM_COMPARATOR& comparator,
+                                    const int64_t& value,
+                                    const int64_t& range_end_value,
+                                    uint32_t& filter_ids_length) const;
+
+    Option<bool> rearranging_recursive_filter(filter_node_t* const filter_tree_root,
+                                              filter_result_t& result,
+                                              const std::string& collection_name = "") const;
 
     Option<bool> recursive_filter(filter_node_t* const root,
                                   filter_result_t& result,
@@ -687,7 +699,8 @@ public:
 
     Option<bool> do_reference_filtering_with_lock(filter_node_t* const filter_tree_root,
                                                   filter_result_t& filter_result,
-                                                  const std::string & reference_helper_field_name) const;
+                                                  const std::string& collection_name,
+                                                  const std::string& reference_helper_field_name) const;
 
     void refresh_schemas(const std::vector<field>& new_fields, const std::vector<field>& del_fields);
 
diff --git a/include/num_tree.h b/include/num_tree.h
index f26b72ba..280f47dd 100644
--- a/include/num_tree.h
+++ b/include/num_tree.h
@@ -11,6 +11,17 @@ class num_tree_t {
 private:
     std::map<int64_t, void*> int64map;
 
+    [[nodiscard]] bool range_inclusive_contains(const int64_t& start, const int64_t& end, const uint32_t& id) const;
+
+    [[nodiscard]] bool contains(const int64_t& value, const uint32_t& id) const {
+        if (int64map.count(value) == 0) {
+            return false;
+        }
+
+        auto ids = int64map.at(value);
+        return ids_t::contains(ids, id);
+    }
+
 public:
 
     ~num_tree_t();
@@ -19,11 +30,27 @@ public:
 
     void range_inclusive_search(int64_t start, int64_t end, uint32_t** ids, size_t& ids_len);
 
+    void approx_range_inclusive_search_count(int64_t start, int64_t end, uint32_t& ids_len);
+
+    void range_inclusive_contains(const int64_t& start, const int64_t& end,
+                                  const uint32_t& context_ids_length,
+                                  const uint32_t*& context_ids,
+                                  size_t& result_ids_len,
+                                  uint32_t*& result_ids) const;
+
     size_t get(int64_t value, std::vector<uint32_t>& geo_result_ids);
 
     void search(NUM_COMPARATOR comparator, int64_t value, uint32_t** ids, size_t& ids_len);
 
+    void approx_search_count(NUM_COMPARATOR comparator, int64_t value, uint32_t& ids_len);
+
     void remove(uint64_t value, uint32_t id);
 
     size_t size();
+
+    void contains(const NUM_COMPARATOR& comparator, const int64_t& value,
+                  const uint32_t& context_ids_length,
+                  const uint32_t*& context_ids,
+                  size_t& result_ids_len,
+                  uint32_t*& result_ids) const;
 };
\ No newline at end of file
diff --git a/include/posting.h b/include/posting.h
index 29ab8cc4..6b9e6882 100644
--- a/include/posting.h
+++ b/include/posting.h
@@ -91,7 +91,9 @@ public:
 
     static void merge(const std::vector<void*>& posting_lists, std::vector<uint32_t>& result_ids);
 
-    static void intersect(const std::vector<void*>& posting_lists, std::vector<uint32_t>& result_ids);
+    static void intersect(const std::vector<void*>& posting_lists, std::vector<uint32_t>& result_ids,
+                          const uint32_t& context_ids_length = 0,
+                          const uint32_t* context_ids = nullptr);
 
     static void get_array_token_positions(
         uint32_t id,
diff --git a/include/topster.h b/include/topster.h
index 25022423..e59ae74c 100644
--- a/include/topster.h
+++ b/include/topster.h
@@ -14,14 +14,15 @@ struct KV {
     uint64_t key{};
     uint64_t distinct_key{};
     int64_t scores[3]{};  // match score + 2 custom attributes
-    reference_filter_result_t* reference_filter_result;
+    reference_filter_result_t* reference_filter_result = nullptr;
 
     // to be used only in final aggregation
     uint64_t* query_indices = nullptr;
 
-    KV(uint16_t queryIndex, uint64_t key, uint64_t distinct_key, uint8_t match_score_index, const int64_t *scores):
+    KV(uint16_t queryIndex, uint64_t key, uint64_t distinct_key, uint8_t match_score_index, const int64_t *scores,
+       reference_filter_result_t* reference_filter_result = nullptr):
             match_score_index(match_score_index), query_index(queryIndex), array_index(0), key(key),
-            distinct_key(distinct_key) {
+            distinct_key(distinct_key), reference_filter_result(reference_filter_result) {
         this->scores[0] = scores[0];
         this->scores[1] = scores[1];
         this->scores[2] = scores[2];
diff --git a/src/collection.cpp b/src/collection.cpp
index 95190ac7..3766a94d 100644
--- a/src/collection.cpp
+++ b/src/collection.cpp
@@ -2519,8 +2519,6 @@ Option<bool> Collection::get_filter_ids(const std::string& filter_query, filter_
 }
 
 Option<std::string> Collection::get_reference_field(const std::string & collection_name) const {
-    std::shared_lock lock(mutex);
-
     std::string reference_field_name;
     for (auto const& pair: reference_fields) {
         auto reference_pair = pair.second;
@@ -2541,13 +2539,13 @@ Option<std::string> Collection::get_reference_field(const std::string & collecti
 Option<bool> Collection::get_reference_filter_ids(const std::string & filter_query,
                                                   filter_result_t& filter_result,
                                                   const std::string & collection_name) const {
+    std::shared_lock lock(mutex);
+
     auto reference_field_op = get_reference_field(collection_name);
     if (!reference_field_op.ok()) {
         return Option<bool>(reference_field_op.code(), reference_field_op.error());
     }
 
-    std::shared_lock lock(mutex);
-
     const std::string doc_id_prefix = std::to_string(collection_id) + "_" + DOC_ID_PREFIX + "_";
     filter_node_t* filter_tree_root = nullptr;
     Option<bool> parse_op = filter::parse_filter_query(filter_query, search_schema,
@@ -2558,7 +2556,7 @@ Option<bool> Collection::get_reference_filter_ids(const std::string & filter_que
 
     // Reference helper field has the sequence id of other collection's documents.
     auto field_name = reference_field_op.get() + REFERENCE_HELPER_FIELD_SUFFIX;
-    auto filter_op = index->do_reference_filtering_with_lock(filter_tree_root, filter_result, field_name);
+    auto filter_op = index->do_reference_filtering_with_lock(filter_tree_root, filter_result, name, field_name);
     if (!filter_op.ok()) {
         return filter_op;
     }
@@ -2583,22 +2581,6 @@ Option<bool> Collection::validate_reference_filter(const std::string& filter_que
     return Option<bool>(true);
 }
 
-Option<bool> Collection::validate_reference_filter(const std::string& filter_query) const {
-    std::shared_lock lock(mutex);
-
-    const std::string doc_id_prefix = std::to_string(collection_id) + "_" + DOC_ID_PREFIX + "_";
-    filter_node_t* filter_tree_root = nullptr;
-    Option<bool> filter_op = filter::parse_filter_query(filter_query, search_schema,
-                                                        store, doc_id_prefix, filter_tree_root);
-
-    if(!filter_op.ok()) {
-        return filter_op;
-    }
-
-    delete filter_tree_root;
-    return Option<bool>(true);
-}
-
 bool Collection::facet_value_to_string(const facet &a_facet, const facet_count_t &facet_count,
                                        const nlohmann::json &document, std::string &value) const {
 
diff --git a/src/field.cpp b/src/field.cpp
index 729ae55f..129c7512 100644
--- a/src/field.cpp
+++ b/src/field.cpp
@@ -418,38 +418,6 @@ Option<bool> toParseTree(std::queue<std::string>& postfix, filter_node_t*& root,
         } else {
             filter filter_exp;
 
-            // Expected value: $Collection(...)
-            bool is_referenced_filter = (expression[0] == '$' && expression[expression.size() - 1] == ')');
-            if (is_referenced_filter) {
-                size_t parenthesis_index = expression.find('(');
-
-                std::string collection_name = expression.substr(1, parenthesis_index - 1);
-                auto& cm = CollectionManager::get_instance();
-                auto collection = cm.get_collection(collection_name);
-                if (collection == nullptr) {
-                    return Option<bool>(400, "Referenced collection `" + collection_name + "` not found.");
-                }
-
-                filter_exp = {expression.substr(parenthesis_index + 1, expression.size() - parenthesis_index - 2)};
-                filter_exp.referenced_collection_name = collection_name;
-
-                auto op = collection->validate_reference_filter(filter_exp.field_name);
-                if (!op.ok()) {
-                    return Option<bool>(400, "Failed to parse reference filter on `" + collection_name +
-                                                "` collection: " + op.error());
-                }
-            } else {
-                Option<bool> toFilter_op = toFilter(expression, filter_exp, search_schema, store, doc_id_prefix);
-                if (!toFilter_op.ok()) {
-		            while(!nodeStack.empty()) {
-                        auto filterNode = nodeStack.top();
-                        delete filterNode;
-                        nodeStack.pop();
-                    }
-                    return toFilter_op;
-                }
-            }
-
             // Expected value: $Collection(...)
             bool is_referenced_filter = (expression[0] == '$' && expression[expression.size() - 1] == ')');
             if (is_referenced_filter) {
diff --git a/src/index.cpp b/src/index.cpp
index 0379ae43..0891968f 100644
--- a/src/index.cpp
+++ b/src/index.cpp
@@ -1451,11 +1451,18 @@ void Index::search_candidates(const uint8_t & field_id, bool field_is_array,
 
 void Index::numeric_not_equals_filter(num_tree_t* const num_tree,
                                       const int64_t value,
-                                      uint32_t*& ids,
-                                      size_t& ids_len) const {
+                                      const uint32_t& context_ids_length,
+                                      const uint32_t* context_ids,
+                                      size_t& ids_len,
+                                      uint32_t*& ids) const {
     uint32_t* to_exclude_ids = nullptr;
     size_t to_exclude_ids_len = 0;
-    num_tree->search(EQUALS, value, &to_exclude_ids, to_exclude_ids_len);
+
+    if (context_ids_length != 0) {
+        num_tree->contains(EQUALS, value, context_ids_length, context_ids, to_exclude_ids_len, to_exclude_ids);
+    } else {
+        num_tree->search(EQUALS, value, &to_exclude_ids, to_exclude_ids_len);
+    }
 
     auto all_ids = seq_ids->uncompress();
     auto all_ids_size = seq_ids->num_ids();
@@ -1470,17 +1477,25 @@ void Index::numeric_not_equals_filter(num_tree_t* const num_tree,
     delete[] to_exclude_ids;
 
     uint32_t* out = nullptr;
-    ids_len = ArrayUtils::or_scalar(ids, ids_len,
-                                    to_include_ids, to_include_ids_len, &out);
+    ids_len = ArrayUtils::or_scalar(ids, ids_len, to_include_ids, to_include_ids_len, &out);
+
     delete[] ids;
     delete[] to_include_ids;
 
     ids = out;
 }
 
+bool Index::field_is_indexed(const std::string& field_name) const {
+    return search_index.count(field_name) != 0 ||
+    numerical_index.count(field_name) != 0 ||
+    geopoint_index.count(field_name) != 0;
+}
+
 Option<bool> Index::do_filtering(filter_node_t* const root,
                                  filter_result_t& result,
-                                 const std::string& collection_name) const {
+                                 const std::string& collection_name,
+                                 const uint32_t& context_ids_length,
+                                 const uint32_t* context_ids) const {
     // auto begin = std::chrono::high_resolution_clock::now();
     const filter a_filter = root->filter_exp;
 
@@ -1492,13 +1507,46 @@ Option<bool> Index::do_filtering(filter_node_t* const root,
         if (collection == nullptr) {
             return Option<bool>(400, "Referenced collection `" + a_filter.referenced_collection_name + "` not found.");
         }
+
+        filter_result_t reference_filter_result;
         auto reference_filter_op = collection->get_reference_filter_ids(a_filter.field_name,
-                                                                        result,
+                                                                        reference_filter_result,
                                                                         collection_name);
         if (!reference_filter_op.ok()) {
             return reference_filter_op;
         }
 
+        if (context_ids_length != 0) {
+            std::vector<uint32_t> include_indexes;
+            include_indexes.reserve(std::min(context_ids_length, reference_filter_result.count));
+
+            size_t context_index = 0, reference_result_index = 0;
+            while (context_index < context_ids_length && reference_result_index < reference_filter_result.count) {
+                if (context_ids[context_index] == reference_filter_result.docs[reference_result_index]) {
+                    include_indexes.push_back(reference_result_index);
+                    context_index++;
+                    reference_result_index++;
+                } else if (context_ids[context_index] < reference_filter_result.docs[reference_result_index]) {
+                    context_index++;
+                } else {
+                    reference_result_index++;
+                }
+            }
+
+            result.count = include_indexes.size();
+            result.docs = new uint32_t[include_indexes.size()];
+            auto& result_references = result.reference_filter_results[a_filter.referenced_collection_name];
+            result_references = new reference_filter_result_t[include_indexes.size()];
+
+            for (uint32_t i = 0; i < include_indexes.size(); i++) {
+                result.docs[i] = reference_filter_result.docs[include_indexes[i]];
+                result_references[i] = reference_filter_result.reference_filter_results[a_filter.referenced_collection_name][include_indexes[i]];
+            }
+
+            return Option(true);
+        }
+
+        result = reference_filter_result;
         return Option(true);
     }
 
@@ -1511,18 +1559,26 @@ Option<bool> Index::do_filtering(filter_node_t* const root,
 
         std::sort(result_ids.begin(), result_ids.end());
 
-        result.docs = new uint32[result_ids.size()];
-        std::copy(result_ids.begin(), result_ids.end(), result.docs);
-        result.count = result_ids.size();
+        auto result_array = new uint32[result_ids.size()];
+        std::copy(result_ids.begin(), result_ids.end(), result_array);
 
+        if (context_ids_length != 0) {
+            uint32_t* out = nullptr;
+            result.count = ArrayUtils::and_scalar(context_ids, context_ids_length,
+                                                  result_array, result_ids.size(), &out);
+
+            delete[] result_array;
+
+            result.docs = out;
+            return Option(true);
+        }
+
+        result.docs = result_array;
+        result.count = result_ids.size();
         return Option(true);
     }
 
-    bool has_search_index = search_index.count(a_filter.field_name) != 0 ||
-                            numerical_index.count(a_filter.field_name) != 0 ||
-                            geopoint_index.count(a_filter.field_name) != 0;
-
-    if (!has_search_index) {
+    if (!field_is_indexed(a_filter.field_name)) {
         return Option(true);
     }
 
@@ -1540,13 +1596,25 @@ Option<bool> Index::do_filtering(filter_node_t* const root,
 
             if(a_filter.comparators[fi] == RANGE_INCLUSIVE && fi+1 < a_filter.values.size()) {
                 const std::string& next_filter_value = a_filter.values[fi + 1];
-                int64_t range_end_value = (int64_t)std::stol(next_filter_value);
-                num_tree->range_inclusive_search(value, range_end_value, &result_ids, result_ids_len);
+                auto const range_end_value = (int64_t)std::stol(next_filter_value);
+
+                if (context_ids_length != 0) {
+                    num_tree->range_inclusive_contains(value, range_end_value, context_ids_length, context_ids,
+                                                       result_ids_len, result_ids);
+                } else {
+                    num_tree->range_inclusive_search(value, range_end_value, &result_ids, result_ids_len);
+                }
+
                 fi++;
             } else if (a_filter.comparators[fi] == NOT_EQUALS) {
-                numeric_not_equals_filter(num_tree, value, result_ids, result_ids_len);
+                numeric_not_equals_filter(num_tree, value, context_ids_length, context_ids, result_ids_len, result_ids);
             } else {
-                num_tree->search(a_filter.comparators[fi], value, &result_ids, result_ids_len);
+                if (context_ids_length != 0) {
+                    num_tree->contains(a_filter.comparators[fi], value,
+                                       context_ids_length, context_ids, result_ids_len, result_ids);
+                } else {
+                    num_tree->search(a_filter.comparators[fi], value, &result_ids, result_ids_len);
+                }
             }
         }
     } else if (f.is_float()) {
@@ -1560,12 +1628,25 @@ Option<bool> Index::do_filtering(filter_node_t* const root,
             if(a_filter.comparators[fi] == RANGE_INCLUSIVE && fi+1 < a_filter.values.size()) {
                 const std::string& next_filter_value = a_filter.values[fi+1];
                 int64_t range_end_value = float_to_int64_t((float) std::atof(next_filter_value.c_str()));
-                num_tree->range_inclusive_search(float_int64, range_end_value, &result_ids, result_ids_len);
+
+                if (context_ids_length != 0) {
+                    num_tree->range_inclusive_contains(float_int64, range_end_value, context_ids_length, context_ids,
+                                                       result_ids_len, result_ids);
+                } else {
+                    num_tree->range_inclusive_search(float_int64, range_end_value, &result_ids, result_ids_len);
+                }
+
                 fi++;
             } else if (a_filter.comparators[fi] == NOT_EQUALS) {
-                numeric_not_equals_filter(num_tree, value, result_ids, result_ids_len);
+                numeric_not_equals_filter(num_tree, float_int64,
+                                          context_ids_length, context_ids, result_ids_len, result_ids);
             } else {
-                num_tree->search(a_filter.comparators[fi], float_int64, &result_ids, result_ids_len);
+                if (context_ids_length != 0) {
+                    num_tree->contains(a_filter.comparators[fi], float_int64,
+                                       context_ids_length, context_ids, result_ids_len, result_ids);
+                } else {
+                    num_tree->search(a_filter.comparators[fi], float_int64, &result_ids, result_ids_len);
+                }
             }
         }
     } else if (f.is_bool()) {
@@ -1575,9 +1656,15 @@ Option<bool> Index::do_filtering(filter_node_t* const root,
         for (const std::string& filter_value : a_filter.values) {
             int64_t bool_int64 = (filter_value == "1") ? 1 : 0;
             if (a_filter.comparators[value_index] == NOT_EQUALS) {
-                numeric_not_equals_filter(num_tree, bool_int64, result_ids, result_ids_len);
+                numeric_not_equals_filter(num_tree, bool_int64,
+                                          context_ids_length, context_ids, result_ids_len, result_ids);
             } else {
-                num_tree->search(a_filter.comparators[value_index], bool_int64, &result_ids, result_ids_len);
+                if (context_ids_length != 0) {
+                    num_tree->contains(a_filter.comparators[value_index], bool_int64,
+                                       context_ids_length, context_ids, result_ids_len, result_ids);
+                } else {
+                    num_tree->search(a_filter.comparators[value_index], bool_int64, &result_ids, result_ids_len);
+                }
             }
 
             value_index++;
@@ -1652,6 +1739,14 @@ Option<bool> Index::do_filtering(filter_node_t* const root,
             // `geo_result_ids` will contain all IDs that are within approximately within query radius
             // we still need to do another round of exact filtering on them
 
+            if (context_ids_length != 0) {
+                uint32_t *out = nullptr;
+                uint32_t count = ArrayUtils::and_scalar(context_ids, context_ids_length,
+                                                        &geo_result_ids[0], geo_result_ids.size(), &out);
+
+                geo_result_ids = std::vector<uint32_t>(out, out + count);
+            }
+
             std::vector<uint32_t> exact_geo_result_ids;
 
             if (f.is_single_geopoint()) {
@@ -1739,7 +1834,7 @@ Option<bool> Index::do_filtering(filter_node_t* const root,
             if(a_filter.comparators[0] == EQUALS || a_filter.comparators[0] == NOT_EQUALS) {
                 // needs intersection + exact matching (unlike CONTAINS)
                 std::vector<uint32_t> result_id_vec;
-                posting_t::intersect(posting_lists, result_id_vec);
+                posting_t::intersect(posting_lists, result_id_vec, context_ids_length, context_ids);
 
                 if (result_id_vec.empty()) {
                     continue;
@@ -1763,7 +1858,7 @@ Option<bool> Index::do_filtering(filter_node_t* const root,
             } else {
                 // CONTAINS
                 size_t before_size = f_id_buff.size();
-                posting_t::intersect(posting_lists, f_id_buff);
+                posting_t::intersect(posting_lists, f_id_buff, context_ids_length, context_ids);
                 if (f_id_buff.size() == before_size) {
                     continue;
                 }
@@ -1811,6 +1906,17 @@ Option<bool> Index::do_filtering(filter_node_t* const root,
 
         result_ids = to_include_ids;
         result_ids_len = to_include_ids_len;
+
+        if (context_ids_length != 0) {
+            uint32_t *out = nullptr;
+            result.count = ArrayUtils::and_scalar(context_ids, context_ids_length,
+                                                  result_ids, result_ids_len, &out);
+
+            delete[] result_ids;
+
+            result.docs = out;
+            return Option(true);
+        }
     }
 
     result.docs = result_ids;
@@ -1824,6 +1930,28 @@ Option<bool> Index::do_filtering(filter_node_t* const root,
     LOG(INFO) << "Time taken for filtering: " << timeMillis << "ms";*/
 }
 
+void Index::aproximate_numerical_match(num_tree_t* const num_tree,
+                                       const NUM_COMPARATOR& comparator,
+                                       const int64_t& value,
+                                       const int64_t& range_end_value,
+                                       uint32_t& filter_ids_length) const {
+    if (comparator == RANGE_INCLUSIVE) {
+        num_tree->approx_range_inclusive_search_count(value, range_end_value, filter_ids_length);
+        return;
+    }
+
+    if (comparator == NOT_EQUALS) {
+        uint32_t to_exclude_ids_len = 0;
+        num_tree->approx_search_count(EQUALS, value, to_exclude_ids_len);
+
+        auto all_ids_size = seq_ids->num_ids();
+        filter_ids_length += (all_ids_size - to_exclude_ids_len);
+        return;
+    }
+
+    num_tree->approx_search_count(comparator, value, filter_ids_length);
+}
+
 Option<bool> Index::rearrange_filter_tree(filter_node_t* const root,
                                           uint32_t& filter_ids_length,
                                           const std::string& collection_name) const {
@@ -1861,13 +1989,94 @@ Option<bool> Index::rearrange_filter_tree(filter_node_t* const root,
         return Option(true);
     }
 
-    filter_result_t result;
-    auto filter_op = do_filtering(root, result, collection_name);
-    if (!filter_op.ok()) {
-        return filter_op;
+    auto a_filter = root->filter_exp;
+
+    if (a_filter.field_name == "id") {
+        filter_ids_length = a_filter.values.size();
+        return Option(true);
+    }
+
+    if (!field_is_indexed(a_filter.field_name)) {
+        return Option(true);
+    }
+
+    field f = search_schema.at(a_filter.field_name);
+
+    if (f.is_integer()) {
+        auto num_tree = numerical_index.at(f.name);
+
+        for (size_t fi = 0; fi < a_filter.values.size(); fi++) {
+            const std::string& filter_value = a_filter.values[fi];
+            auto const value = (int64_t)std::stol(filter_value);
+
+            if (a_filter.comparators[fi] == RANGE_INCLUSIVE && fi+1 < a_filter.values.size()) {
+                const std::string& next_filter_value = a_filter.values[fi + 1];
+                auto const range_end_value = (int64_t)std::stol(next_filter_value);
+
+                aproximate_numerical_match(num_tree, a_filter.comparators[fi], value, range_end_value,
+                                           filter_ids_length);
+                fi++;
+            } else {
+                aproximate_numerical_match(num_tree, a_filter.comparators[fi], value, 0, filter_ids_length);
+            }
+        }
+    } else if (f.is_float()) {
+        auto num_tree = numerical_index.at(a_filter.field_name);
+
+        for (size_t fi = 0; fi < a_filter.values.size(); fi++) {
+            const std::string& filter_value = a_filter.values[fi];
+            float value = (float)std::atof(filter_value.c_str());
+            int64_t float_int64 = float_to_int64_t(value);
+
+            if (a_filter.comparators[fi] == RANGE_INCLUSIVE && fi+1 < a_filter.values.size()) {
+                const std::string& next_filter_value = a_filter.values[fi + 1];
+                auto const range_end_value = float_to_int64_t((float) std::atof(next_filter_value.c_str()));
+
+                aproximate_numerical_match(num_tree, a_filter.comparators[fi], float_int64, range_end_value,
+                                           filter_ids_length);
+                fi++;
+            } else {
+                aproximate_numerical_match(num_tree, a_filter.comparators[fi], float_int64, 0, filter_ids_length);
+            }
+        }
+    } else if (f.is_bool()) {
+        auto num_tree = numerical_index.at(a_filter.field_name);
+
+        size_t value_index = 0;
+        for (const std::string& filter_value : a_filter.values) {
+            int64_t bool_int64 = (filter_value == "1") ? 1 : 0;
+
+            aproximate_numerical_match(num_tree, a_filter.comparators[value_index], bool_int64, 0, filter_ids_length);
+            value_index++;
+        }
+    } else if (f.is_geopoint()) {
+        filter_ids_length = 100;
+    } else if (f.is_string()) {
+        art_tree* t = search_index.at(a_filter.field_name);
+
+        for (const std::string& filter_value : a_filter.values) {
+            Tokenizer tokenizer(filter_value, true, false, f.locale, symbols_to_index, token_separators);
+
+            std::string str_token;
+            size_t token_index = 0;
+
+            while (tokenizer.next(str_token, token_index)) {
+                auto const leaf = (art_leaf *) art_search(t, (const unsigned char*) str_token.c_str(),
+                                                         str_token.length()+1);
+                if (leaf == nullptr) {
+                    continue;
+                }
+
+                filter_ids_length += posting_t::num_ids(leaf->values);
+            }
+        }
+    }
+
+    if (a_filter.apply_not_equals) {
+        auto all_ids_size = seq_ids->num_ids();
+        filter_ids_length = (all_ids_size - filter_ids_length);
     }
 
-    filter_ids_length = result.count;
     return Option(true);
 }
 
@@ -1884,19 +2093,23 @@ Option<bool> Index::rearranging_recursive_filter(filter_node_t* const filter_tre
 }
 
 void copy_reference_ids(filter_result_t& from, filter_result_t& to) {
-    if (to.count > 0 && from.reference_filter_result != nullptr && from.reference_filter_result->count > 0) {
-        to.reference_filter_result = new reference_filter_result_t[to.count];
+    if (to.count > 0 && !from.reference_filter_results.empty()) {
+        for (const auto &item: from.reference_filter_results) {
+            auto& from_reference_result = from.reference_filter_results[item.first];
+            auto& to_reference_result = to.reference_filter_results[item.first];
+            to_reference_result = new reference_filter_result_t[to.count];
 
-        size_t to_index = 0, from_index = 0;
-        while (to_index < to.count && from_index < from.count) {
-            if (to.docs[to_index] == from.docs[from_index]) {
-                to.reference_filter_result[to_index] = from.reference_filter_result[from_index];
-                to_index++;
-                from_index++;
-            } else if (to.docs[to_index] < from.docs[from_index]) {
-                to_index++;
-            } else {
-                from_index++;
+            size_t to_index = 0, from_index = 0;
+            while (to_index < to.count && from_index < from.count) {
+                if (to.docs[to_index] == from.docs[from_index]) {
+                    to_reference_result[to_index] = from_reference_result[from_index];
+                    to_index++;
+                    from_index++;
+                } else if (to.docs[to_index] < from.docs[from_index]) {
+                    to_index++;
+                } else {
+                    from_index++;
+                }
             }
         }
     }
@@ -1938,8 +2151,8 @@ Option<bool> Index::recursive_filter(filter_node_t* const root,
         }
 
         result.docs = filtered_results;
-        if (l_result.reference_filter_result != nullptr || r_result.reference_filter_result != nullptr) {
-            copy_reference_ids(l_result.reference_filter_result != nullptr ? l_result : r_result, result);
+        if (!l_result.reference_filter_results.empty() || !r_result.reference_filter_results.empty()) {
+            copy_reference_ids(!l_result.reference_filter_results.empty() ? l_result : r_result, result);
         }
 
         return Option(true);
@@ -1982,7 +2195,8 @@ Option<bool> Index::do_filtering_with_lock(filter_node_t* const filter_tree_root
 
 Option<bool> Index::do_reference_filtering_with_lock(filter_node_t* const filter_tree_root,
                                                      filter_result_t& filter_result,
-                                                     const std::string & reference_helper_field_name) const {
+                                                     const std::string& collection_name,
+                                                     const std::string& reference_helper_field_name) const {
     std::shared_lock lock(mutex);
 
     filter_result_t reference_filter_result;
@@ -2002,15 +2216,17 @@ Option<bool> Index::do_reference_filtering_with_lock(filter_node_t* const filter
 
     filter_result.count = reference_map.size();
     filter_result.docs = new uint32_t[reference_map.size()];
-    filter_result.reference_filter_result = new reference_filter_result_t[reference_map.size()];
+    filter_result.reference_filter_results[collection_name] = new reference_filter_result_t[reference_map.size()];
 
     size_t doc_index = 0;
     for (auto &item: reference_map) {
         filter_result.docs[doc_index] = item.first;
 
-        filter_result.reference_filter_result[doc_index].count = item.second.size();
-        filter_result.reference_filter_result[doc_index].docs = new uint32_t[item.second.size()];
-        std::copy(item.second.begin(), item.second.end(), filter_result.reference_filter_result[doc_index].docs);
+        auto& reference_result = filter_result.reference_filter_results[collection_name][doc_index];
+        reference_result.count = item.second.size();
+        reference_result.docs = new uint32_t[item.second.size()];
+        std::copy(item.second.begin(), item.second.end(), reference_result.docs);
+
         doc_index++;
     }
 
@@ -2080,7 +2296,7 @@ void Index::collate_included_ids(const std::vector<token_t>& q_included_tokens,
             scores[1] = int64_t(1);
             scores[2] = int64_t(1);
 
-            KV kv(searched_queries.size(), seq_id, distinct_id, 0, scores);
+            KV kv(searched_queries.size(), seq_id, distinct_id, 0, scores, nullptr);
             curated_topster->add(&kv);
         }
     }
@@ -2582,7 +2798,8 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
                 int64_t match_score_index = -1;
 
                 result_ids.push_back(seq_id);
-                KV kv(searched_queries.size(), seq_id, distinct_id, match_score_index, scores);
+
+                KV kv(searched_queries.size(), seq_id, distinct_id, match_score_index, scores, nullptr);
                 int ret = topster->add(&kv);
 
                 if(group_limit != 0 && ret < 2) {
@@ -2681,7 +2898,7 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
 
                 //LOG(INFO) << "SEQ_ID: " << seq_id << ", score: " << dist_label.first;
 
-                KV kv(searched_queries.size(), seq_id, distinct_id, match_score_index, scores);
+                KV kv(searched_queries.size(), seq_id, distinct_id, match_score_index, scores, nullptr);
                 int ret = topster->add(&kv);
 
                 if(group_limit != 0 && ret < 2) {
diff --git a/src/num_tree.cpp b/src/num_tree.cpp
index c8ce253c..5a1b95d3 100644
--- a/src/num_tree.cpp
+++ b/src/num_tree.cpp
@@ -43,6 +43,61 @@ void num_tree_t::range_inclusive_search(int64_t start, int64_t end, uint32_t** i
     *ids = out;
 }
 
+void num_tree_t::approx_range_inclusive_search_count(int64_t start, int64_t end, uint32_t& ids_len) {
+    if (int64map.empty()) {
+        return;
+    }
+
+    auto it_start = int64map.lower_bound(start);  // iter values will be >= start
+
+    while (it_start != int64map.end() && it_start->first <= end) {
+        uint32_t val_ids = ids_t::num_ids(it_start->second);
+        ids_len += val_ids;
+        it_start++;
+    }
+}
+
+bool num_tree_t::range_inclusive_contains(const int64_t& start, const int64_t& end, const uint32_t& id) const {
+    if (int64map.empty()) {
+        return false;
+    }
+
+    auto it_start = int64map.lower_bound(start);  // iter values will be >= start
+
+    while (it_start != int64map.end() && it_start->first <= end) {
+        if (ids_t::contains(it_start->second, id)) {
+            return true;
+        }
+    }
+
+    return false;
+}
+
+void num_tree_t::range_inclusive_contains(const int64_t& start, const int64_t& end,
+                                          const uint32_t& context_ids_length,
+                                          const uint32_t*& context_ids,
+                                          size_t& result_ids_len,
+                                          uint32_t*& result_ids) const {
+    if (int64map.empty()) {
+        return;
+    }
+
+    std::vector<uint32_t> consolidated_ids;
+    consolidated_ids.reserve(context_ids_length);
+    for (uint32_t i = 0; i < context_ids_length; i++) {
+        if (range_inclusive_contains(start, end, context_ids[i])) {
+            consolidated_ids.push_back(context_ids[i]);
+        }
+    }
+
+    uint32_t *out = nullptr;
+    result_ids_len = ArrayUtils::or_scalar(&consolidated_ids[0], consolidated_ids.size(),
+                                           result_ids, result_ids_len, &out);
+
+    delete [] result_ids;
+    result_ids = out;
+}
+
 size_t num_tree_t::get(int64_t value, std::vector<uint32_t>& geo_result_ids) {
     const auto& it = int64map.find(value);
     if(it == int64map.end()) {
@@ -132,6 +187,54 @@ void num_tree_t::search(NUM_COMPARATOR comparator, int64_t value, uint32_t** ids
     }
 }
 
+void num_tree_t::approx_search_count(NUM_COMPARATOR comparator, int64_t value, uint32_t& ids_len) {
+    if (int64map.empty()) {
+        return;
+    }
+
+    if (comparator == EQUALS) {
+        const auto& it = int64map.find(value);
+        if (it != int64map.end()) {
+            uint32_t val_ids = ids_t::num_ids(it->second);
+            ids_len += val_ids;
+        }
+    } else if (comparator == GREATER_THAN || comparator == GREATER_THAN_EQUALS) {
+        // iter entries will be >= value, or end() if all entries are before value
+        auto iter_ge_value = int64map.lower_bound(value);
+
+        if (iter_ge_value == int64map.end()) {
+            return;
+        }
+
+        if (comparator == GREATER_THAN && iter_ge_value->first == value) {
+            iter_ge_value++;
+        }
+
+        while (iter_ge_value != int64map.end()) {
+            uint32_t val_ids = ids_t::num_ids(iter_ge_value->second);
+            ids_len += val_ids;
+            iter_ge_value++;
+        }
+    } else if (comparator == LESS_THAN || comparator == LESS_THAN_EQUALS) {
+        // iter entries will be >= value, or end() if all entries are before value
+        auto iter_ge_value = int64map.lower_bound(value);
+
+        auto it = int64map.begin();
+
+        while (it != iter_ge_value) {
+            uint32_t val_ids = ids_t::num_ids(it->second);
+            ids_len += val_ids;
+            it++;
+        }
+
+        // for LESS_THAN_EQUALS, check if last iter entry is equal to value
+        if (it != int64map.end() && comparator == LESS_THAN_EQUALS && it->first == value) {
+            uint32_t val_ids = ids_t::num_ids(it->second);
+            ids_len += val_ids;
+        }
+    }
+}
+
 void num_tree_t::remove(uint64_t value, uint32_t id) {
     if(int64map.count(value) != 0) {
         void* arr = int64map[value];
@@ -146,6 +249,75 @@ void num_tree_t::remove(uint64_t value, uint32_t id) {
     }
 }
 
+void num_tree_t::contains(const NUM_COMPARATOR& comparator, const int64_t& value,
+                          const uint32_t& context_ids_length,
+                          const uint32_t*& context_ids,
+                          size_t& result_ids_len,
+                          uint32_t*& result_ids) const {
+    if (int64map.empty()) {
+        return;
+    }
+
+    std::vector<uint32_t> consolidated_ids;
+    consolidated_ids.reserve(context_ids_length);
+    for (uint32_t i = 0; i < context_ids_length; i++) {
+        if (comparator == EQUALS) {
+            if (contains(value, context_ids[i])) {
+                consolidated_ids.push_back(context_ids[i]);
+            }
+        } else if (comparator == GREATER_THAN || comparator == GREATER_THAN_EQUALS) {
+            // iter entries will be >= value, or end() if all entries are before value
+            auto iter_ge_value = int64map.lower_bound(value);
+
+            if (iter_ge_value == int64map.end()) {
+                continue;
+            }
+
+            if (comparator == GREATER_THAN && iter_ge_value->first == value) {
+                iter_ge_value++;
+            }
+
+            while (iter_ge_value != int64map.end()) {
+                if (contains(iter_ge_value->first, context_ids[i])) {
+                    consolidated_ids.push_back(context_ids[i]);
+                    break;
+                }
+                iter_ge_value++;
+            }
+        } else if(comparator == LESS_THAN || comparator == LESS_THAN_EQUALS) {
+            // iter entries will be >= value, or end() if all entries are before value
+            auto iter_ge_value = int64map.lower_bound(value);
+            auto it = int64map.begin();
+
+            while (it != iter_ge_value) {
+                if (contains(it->first, context_ids[i])) {
+                    consolidated_ids.push_back(context_ids[i]);
+                    break;
+                }
+                it++;
+            }
+
+            // for LESS_THAN_EQUALS, check if last iter entry is equal to value
+            if (it != int64map.end() && comparator == LESS_THAN_EQUALS && it->first == value) {
+                if (contains(it->first, context_ids[i])) {
+                    consolidated_ids.push_back(context_ids[i]);
+                    break;
+                }
+            }
+        }
+    }
+
+    gfx::timsort(consolidated_ids.begin(), consolidated_ids.end());
+    consolidated_ids.erase(unique(consolidated_ids.begin(), consolidated_ids.end()), consolidated_ids.end());
+
+    uint32_t *out = nullptr;
+    result_ids_len = ArrayUtils::or_scalar(&consolidated_ids[0], consolidated_ids.size(),
+                                           result_ids, result_ids_len, &out);
+
+    delete[] result_ids;
+    result_ids = out;
+}
+
 size_t num_tree_t::size() {
     return int64map.size();
 }
diff --git a/src/posting.cpp b/src/posting.cpp
index 8b72f078..05b5b061 100644
--- a/src/posting.cpp
+++ b/src/posting.cpp
@@ -386,7 +386,32 @@ void posting_t::merge(const std::vector<void*>& raw_posting_lists, std::vector<u
     }
 }
 
-void posting_t::intersect(const std::vector<void*>& raw_posting_lists, std::vector<uint32_t>& result_ids) {
+void posting_t::intersect(const std::vector<void*>& raw_posting_lists, std::vector<uint32_t>& result_ids,
+                          const uint32_t& context_ids_length,
+                          const uint32_t* context_ids) {
+    if (context_ids_length != 0) {
+        if (raw_posting_lists.empty()) {
+            return;
+        }
+
+        for (uint32_t i = 0; i < context_ids_length; i++) {
+            bool is_present = true;
+
+            for (auto const& raw_posting_list: raw_posting_lists) {
+                if (!contains(raw_posting_list, context_ids[i])) {
+                    is_present = false;
+                    break;
+                }
+            }
+
+            if (is_present) {
+                result_ids.push_back(context_ids[i]);
+            }
+        }
+
+        return;
+    }
+
     // we will have to convert the compact posting list (if any) to full form
     std::vector<posting_list_t*> plists;
     std::vector<posting_list_t*> expanded_plists;
diff --git a/test/collection_join_test.cpp b/test/collection_join_test.cpp
index c8ee0cfd..f302d3dc 100644
--- a/test/collection_join_test.cpp
+++ b/test/collection_join_test.cpp
@@ -651,11 +651,11 @@ TEST_F(CollectionJoinTest, IncludeFieldsByReference_SingleMatch) {
     ASSERT_FALSE(search_op.ok());
     ASSERT_EQ("Invalid reference in include_fields, expected `$CollectionName(fieldA, ...)`.", search_op.error());
 
-    req_params["include_fields"] = "$foo(bar)";
-    search_op = collectionManager.do_search(req_params, embedded_params, json_res, now_ts);
-    ASSERT_FALSE(search_op.ok());
-    ASSERT_EQ("Referenced collection `foo` not found.", search_op.error());
-
+//    req_params["include_fields"] = "$foo(bar)";
+//    search_op = collectionManager.do_search(req_params, embedded_params, json_res, now_ts);
+//    ASSERT_FALSE(search_op.ok());
+//    ASSERT_EQ("Referenced collection `foo` not found.", search_op.error());
+//
 //    req_params["include_fields"] = "$Customers(bar)";
 //    search_op = collectionManager.do_search(req_params, embedded_params, json_res, now_ts);
 //    ASSERT_TRUE(search_op.ok());

From 665eee9b031514f26b814a5c472ab7ad414923f4 Mon Sep 17 00:00:00 2001
From: Harpreet Sangar <happy_san@protonmail.com>
Date: Fri, 3 Mar 2023 11:46:40 +0530
Subject: [PATCH 07/51] Fix invalid read.

---
 include/field.h | 14 ++++++++++++++
 src/index.cpp   |  2 +-
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/include/field.h b/include/field.h
index 776481d2..18a1d4b7 100644
--- a/include/field.h
+++ b/include/field.h
@@ -648,6 +648,20 @@ struct filter_result_t {
 
     filter_result_t(uint32_t count, uint32_t* docs) : count(count), docs(docs) {}
 
+    filter_result_t& operator=(filter_result_t&& obj) noexcept {
+        if (&obj == this)
+            return *this;
+
+        count = obj.count;
+        docs = obj.docs;
+        reference_filter_results = std::map(obj.reference_filter_results);
+
+        obj.docs = nullptr;
+        obj.reference_filter_results.clear();
+
+        return *this;
+    }
+
     ~filter_result_t() {
         delete[] docs;
         for (const auto &item: reference_filter_results) {
diff --git a/src/index.cpp b/src/index.cpp
index 0891968f..f9ba9f35 100644
--- a/src/index.cpp
+++ b/src/index.cpp
@@ -1546,7 +1546,7 @@ Option<bool> Index::do_filtering(filter_node_t* const root,
             return Option(true);
         }
 
-        result = reference_filter_result;
+        result = std::move(reference_filter_result);
         return Option(true);
     }
 

From 61bdcd91fd2ba43c4393a7bf898456eb790135fd Mon Sep 17 00:00:00 2001
From: Harpreet Sangar <happy_san@protonmail.com>
Date: Tue, 17 Jan 2023 14:08:39 +0530
Subject: [PATCH 08/51] Abstract `foo_sequence_id` field from user.

---
 include/field.h                  |  7 +++++++
 src/collection_manager.cpp       |  4 ++++
 test/collection_manager_test.cpp | 17 ++---------------
 3 files changed, 13 insertions(+), 15 deletions(-)

diff --git a/include/field.h b/include/field.h
index 18a1d4b7..44de4637 100644
--- a/include/field.h
+++ b/include/field.h
@@ -11,6 +11,7 @@
 #include <tsl/htrie_map.h>
 #include "json.hpp"
 #include "text_embedder_manager.h"
+#include <regex>
 
 namespace field_types {
     // first field value indexed will determine the type
@@ -284,11 +285,17 @@ struct field {
                                               const std::string & default_sorting_field,
                                               nlohmann::json& fields_json) {
         bool found_default_sorting_field = false;
+        const std::regex sequence_id_pattern(".*_sequence_id$");
 
         // Check for duplicates in field names
         std::map<std::string, std::vector<const field*>> unique_fields;
 
         for(const field & field: fields) {
+            if (std::regex_match(field.name, sequence_id_pattern)) {
+                // Don't add foo_sequence_id field.
+                continue;
+            }
+
             unique_fields[field.name].push_back(&field);
 
             if(field.name == "id") {
diff --git a/src/collection_manager.cpp b/src/collection_manager.cpp
index 0475dfbe..96a187b9 100644
--- a/src/collection_manager.cpp
+++ b/src/collection_manager.cpp
@@ -89,6 +89,10 @@ Collection* CollectionManager::init_collection(const nlohmann::json & collection
         }
 
         fields.push_back(f);
+
+        if (!f.reference.empty()) {
+            fields.emplace_back(field(f.name + "_sequence_id", "string", false, f.optional, true));
+        }
     }
 
     std::string default_sorting_field = collection_meta[Collection::COLLECTION_DEFAULT_SORTING_FIELD_KEY].get<std::string>();
diff --git a/test/collection_manager_test.cpp b/test/collection_manager_test.cpp
index 38c7e014..cdbd482e 100644
--- a/test/collection_manager_test.cpp
+++ b/test/collection_manager_test.cpp
@@ -227,17 +227,6 @@ TEST_F(CollectionManagerTest, CollectionCreation) {
               "sort":false,
               "type":"string",
               "reference":"Products.product_id"
-            },
-            {
-              "facet":false,
-              "index":true,
-              "infix":false,
-              "locale":"",
-              "name":"product_id_sequence_id",
-              "nested":false,
-              "optional":true,
-              "sort":true,
-              "type":"int64"
             }
           ],
           "id":0,
@@ -475,11 +464,9 @@ TEST_F(CollectionManagerTest, RestoreRecordsOnRestart) {
     ASSERT_EQ(0, collection1->get_collection_id());
     ASSERT_EQ(18, collection1->get_next_seq_id());
     ASSERT_EQ(facet_fields_expected, collection1->get_facet_fields());
-    // product_id_sequence_id is also included
-    ASSERT_EQ(3, collection1->get_sort_fields().size());
+    ASSERT_EQ(2, collection1->get_sort_fields().size());
     ASSERT_EQ("location", collection1->get_sort_fields()[0].name);
-    ASSERT_EQ("product_id_sequence_id", collection1->get_sort_fields()[1].name);
-    ASSERT_EQ("points", collection1->get_sort_fields()[2].name);
+    ASSERT_EQ("points", collection1->get_sort_fields()[1].name);
     ASSERT_EQ(schema.size(), collection1->get_schema().size());
     ASSERT_EQ("points", collection1->get_default_sorting_field());
 

From 0fb5e0d2a27508c20ece66d33f6b8c35905d88d1 Mon Sep 17 00:00:00 2001
From: Harpreet Sangar <happy_san@protonmail.com>
Date: Thu, 19 Jan 2023 11:25:43 +0530
Subject: [PATCH 09/51] Serialize sequence id.

---
 test/collection_join_test.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/test/collection_join_test.cpp b/test/collection_join_test.cpp
index f302d3dc..9db121aa 100644
--- a/test/collection_join_test.cpp
+++ b/test/collection_join_test.cpp
@@ -265,7 +265,6 @@ TEST_F(CollectionJoinTest, IndexDocumentHavingReferenceField) {
         }
         ASSERT_TRUE(add_op.ok());
     }
-
     collectionManager.drop_collection("Customers");
     customers_schema_json =
             R"({

From e27cc6e34ca600f91db9a948312b8cf11ec51979 Mon Sep 17 00:00:00 2001
From: Harpreet Sangar <happy_san@protonmail.com>
Date: Thu, 19 Jan 2023 11:27:52 +0530
Subject: [PATCH 10/51] Store `foo_sequence_id` in collection's meta-data.

---
 include/field.h                  |  7 -------
 src/collection_manager.cpp       |  4 ----
 test/collection_manager_test.cpp | 17 +++++++++++++++--
 3 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/include/field.h b/include/field.h
index 44de4637..18a1d4b7 100644
--- a/include/field.h
+++ b/include/field.h
@@ -11,7 +11,6 @@
 #include <tsl/htrie_map.h>
 #include "json.hpp"
 #include "text_embedder_manager.h"
-#include <regex>
 
 namespace field_types {
     // first field value indexed will determine the type
@@ -285,17 +284,11 @@ struct field {
                                               const std::string & default_sorting_field,
                                               nlohmann::json& fields_json) {
         bool found_default_sorting_field = false;
-        const std::regex sequence_id_pattern(".*_sequence_id$");
 
         // Check for duplicates in field names
         std::map<std::string, std::vector<const field*>> unique_fields;
 
         for(const field & field: fields) {
-            if (std::regex_match(field.name, sequence_id_pattern)) {
-                // Don't add foo_sequence_id field.
-                continue;
-            }
-
             unique_fields[field.name].push_back(&field);
 
             if(field.name == "id") {
diff --git a/src/collection_manager.cpp b/src/collection_manager.cpp
index 96a187b9..0475dfbe 100644
--- a/src/collection_manager.cpp
+++ b/src/collection_manager.cpp
@@ -89,10 +89,6 @@ Collection* CollectionManager::init_collection(const nlohmann::json & collection
         }
 
         fields.push_back(f);
-
-        if (!f.reference.empty()) {
-            fields.emplace_back(field(f.name + "_sequence_id", "string", false, f.optional, true));
-        }
     }
 
     std::string default_sorting_field = collection_meta[Collection::COLLECTION_DEFAULT_SORTING_FIELD_KEY].get<std::string>();
diff --git a/test/collection_manager_test.cpp b/test/collection_manager_test.cpp
index cdbd482e..38c7e014 100644
--- a/test/collection_manager_test.cpp
+++ b/test/collection_manager_test.cpp
@@ -227,6 +227,17 @@ TEST_F(CollectionManagerTest, CollectionCreation) {
               "sort":false,
               "type":"string",
               "reference":"Products.product_id"
+            },
+            {
+              "facet":false,
+              "index":true,
+              "infix":false,
+              "locale":"",
+              "name":"product_id_sequence_id",
+              "nested":false,
+              "optional":true,
+              "sort":true,
+              "type":"int64"
             }
           ],
           "id":0,
@@ -464,9 +475,11 @@ TEST_F(CollectionManagerTest, RestoreRecordsOnRestart) {
     ASSERT_EQ(0, collection1->get_collection_id());
     ASSERT_EQ(18, collection1->get_next_seq_id());
     ASSERT_EQ(facet_fields_expected, collection1->get_facet_fields());
-    ASSERT_EQ(2, collection1->get_sort_fields().size());
+    // product_id_sequence_id is also included
+    ASSERT_EQ(3, collection1->get_sort_fields().size());
     ASSERT_EQ("location", collection1->get_sort_fields()[0].name);
-    ASSERT_EQ("points", collection1->get_sort_fields()[1].name);
+    ASSERT_EQ("product_id_sequence_id", collection1->get_sort_fields()[1].name);
+    ASSERT_EQ("points", collection1->get_sort_fields()[2].name);
     ASSERT_EQ(schema.size(), collection1->get_schema().size());
     ASSERT_EQ("points", collection1->get_default_sorting_field());
 

From 72f896dc2e03242146561e65e01bde7a9c7e40f6 Mon Sep 17 00:00:00 2001
From: Harpreet Sangar <happy_san@protonmail.com>
Date: Sun, 22 Jan 2023 12:02:29 +0530
Subject: [PATCH 11/51] Filter by reference.

---
 include/collection.h          |  2 ++
 src/collection.cpp            | 16 ++++++++++++++++
 test/collection_join_test.cpp |  3 +++
 3 files changed, 21 insertions(+)

diff --git a/include/collection.h b/include/collection.h
index 27bf7920..f3dbc66a 100644
--- a/include/collection.h
+++ b/include/collection.h
@@ -463,6 +463,8 @@ public:
 
     Option<bool> validate_reference_filter(const std::string& filter_query) const;
 
+    Option<bool> validate_reference_filter(const std::string& filter_query) const;
+
     Option<nlohmann::json> get(const std::string & id) const;
 
     Option<std::string> remove(const std::string & id, bool remove_from_store = true);
diff --git a/src/collection.cpp b/src/collection.cpp
index 3766a94d..c4a1066a 100644
--- a/src/collection.cpp
+++ b/src/collection.cpp
@@ -2581,6 +2581,22 @@ Option<bool> Collection::validate_reference_filter(const std::string& filter_que
     return Option<bool>(true);
 }
 
+Option<bool> Collection::validate_reference_filter(const std::string& filter_query) const {
+    std::shared_lock lock(mutex);
+
+    const std::string doc_id_prefix = std::to_string(collection_id) + "_" + DOC_ID_PREFIX + "_";
+    filter_node_t* filter_tree_root = nullptr;
+    Option<bool> filter_op = filter::parse_filter_query(filter_query, search_schema,
+                                                        store, doc_id_prefix, filter_tree_root);
+
+    if(!filter_op.ok()) {
+        return filter_op;
+    }
+
+    delete filter_tree_root;
+    return Option<bool>(true);
+}
+
 bool Collection::facet_value_to_string(const facet &a_facet, const facet_count_t &facet_count,
                                        const nlohmann::json &document, std::string &value) const {
 
diff --git a/test/collection_join_test.cpp b/test/collection_join_test.cpp
index 9db121aa..0cedd216 100644
--- a/test/collection_join_test.cpp
+++ b/test/collection_join_test.cpp
@@ -284,6 +284,9 @@ TEST_F(CollectionJoinTest, IndexDocumentHavingReferenceField) {
     ASSERT_TRUE(add_doc_op.ok());
     ASSERT_EQ(customer_collection->get("0").get().count("reference_id_sequence_id"), 1);
 
+    // Referenced document should be accessible from Customers collection.
+    auto sequence_id = collectionManager.get_collection("Products")->get_seq_id_collection_prefix() + "_" +
+                                customer_collection->get("0").get()["product_id_sequence_id"].get<std::string>();
     nlohmann::json document;
     // Referenced document's sequence_id must be valid.
     auto get_op = collectionManager.get_collection("Products")->get_document_from_store(

From 1d3e05f2cbf6f9e342a118db2c4bd250471f34ea Mon Sep 17 00:00:00 2001
From: Harpreet Sangar <happy_san@protonmail.com>
Date: Tue, 24 Jan 2023 10:57:29 +0530
Subject: [PATCH 12/51] Optimize reference filtering.

---
 include/collection.h          |  4 +++-
 include/index.h               |  4 ++++
 src/collection.cpp            | 16 ----------------
 src/index.cpp                 |  2 +-
 test/collection_join_test.cpp |  3 ---
 5 files changed, 8 insertions(+), 21 deletions(-)

diff --git a/include/collection.h b/include/collection.h
index f3dbc66a..14693473 100644
--- a/include/collection.h
+++ b/include/collection.h
@@ -463,7 +463,9 @@ public:
 
     Option<bool> validate_reference_filter(const std::string& filter_query) const;
 
-    Option<bool> validate_reference_filter(const std::string& filter_query) const;
+    Option<bool> get_reference_filter_ids(const std::string & filter_query,
+                                          const std::string & collection_name,
+                                          std::pair<uint32_t, uint32_t*>& reference_index_ids) const;
 
     Option<nlohmann::json> get(const std::string & id) const;
 
diff --git a/include/index.h b/include/index.h
index 0ce10daf..29e1292c 100644
--- a/include/index.h
+++ b/include/index.h
@@ -702,6 +702,10 @@ public:
                                                   const std::string& collection_name,
                                                   const std::string& reference_helper_field_name) const;
 
+    void do_reference_filtering_with_lock(std::pair<uint32_t, uint32_t*>& reference_index_ids,
+                                          filter_node_t const* const& filter_tree_root,
+                                          const std::string& reference_field_name) const;
+
     void refresh_schemas(const std::vector<field>& new_fields, const std::vector<field>& del_fields);
 
     // the following methods are not synchronized because their parent calls are synchronized or they are const/static
diff --git a/src/collection.cpp b/src/collection.cpp
index c4a1066a..3766a94d 100644
--- a/src/collection.cpp
+++ b/src/collection.cpp
@@ -2581,22 +2581,6 @@ Option<bool> Collection::validate_reference_filter(const std::string& filter_que
     return Option<bool>(true);
 }
 
-Option<bool> Collection::validate_reference_filter(const std::string& filter_query) const {
-    std::shared_lock lock(mutex);
-
-    const std::string doc_id_prefix = std::to_string(collection_id) + "_" + DOC_ID_PREFIX + "_";
-    filter_node_t* filter_tree_root = nullptr;
-    Option<bool> filter_op = filter::parse_filter_query(filter_query, search_schema,
-                                                        store, doc_id_prefix, filter_tree_root);
-
-    if(!filter_op.ok()) {
-        return filter_op;
-    }
-
-    delete filter_tree_root;
-    return Option<bool>(true);
-}
-
 bool Collection::facet_value_to_string(const facet &a_facet, const facet_count_t &facet_count,
                                        const nlohmann::json &document, std::string &value) const {
 
diff --git a/src/index.cpp b/src/index.cpp
index f9ba9f35..840cebfd 100644
--- a/src/index.cpp
+++ b/src/index.cpp
@@ -1497,7 +1497,7 @@ Option<bool> Index::do_filtering(filter_node_t* const root,
                                  const uint32_t& context_ids_length,
                                  const uint32_t* context_ids) const {
     // auto begin = std::chrono::high_resolution_clock::now();
-    const filter a_filter = root->filter_exp;
+/**/    const filter a_filter = root->filter_exp;
 
     bool is_referenced_filter = !a_filter.referenced_collection_name.empty();
     if (is_referenced_filter) {
diff --git a/test/collection_join_test.cpp b/test/collection_join_test.cpp
index 0cedd216..9db121aa 100644
--- a/test/collection_join_test.cpp
+++ b/test/collection_join_test.cpp
@@ -284,9 +284,6 @@ TEST_F(CollectionJoinTest, IndexDocumentHavingReferenceField) {
     ASSERT_TRUE(add_doc_op.ok());
     ASSERT_EQ(customer_collection->get("0").get().count("reference_id_sequence_id"), 1);
 
-    // Referenced document should be accessible from Customers collection.
-    auto sequence_id = collectionManager.get_collection("Products")->get_seq_id_collection_prefix() + "_" +
-                                customer_collection->get("0").get()["product_id_sequence_id"].get<std::string>();
     nlohmann::json document;
     // Referenced document's sequence_id must be valid.
     auto get_op = collectionManager.get_collection("Products")->get_document_from_store(

From 753aa298881715ecd980050e462b3c685106ce42 Mon Sep 17 00:00:00 2001
From: Harpreet Sangar <happy_san@protonmail.com>
Date: Fri, 27 Jan 2023 12:57:13 +0530
Subject: [PATCH 13/51] Add `Index::rearranging_recursive_filter`.

---
 include/index.h | 2 +-
 src/index.cpp   | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/include/index.h b/include/index.h
index 29e1292c..600d3c6f 100644
--- a/include/index.h
+++ b/include/index.h
@@ -703,7 +703,7 @@ public:
                                                   const std::string& reference_helper_field_name) const;
 
     void do_reference_filtering_with_lock(std::pair<uint32_t, uint32_t*>& reference_index_ids,
-                                          filter_node_t const* const& filter_tree_root,
+                                          filter_node_t* filter_tree_root,
                                           const std::string& reference_field_name) const;
 
     void refresh_schemas(const std::vector<field>& new_fields, const std::vector<field>& del_fields);
diff --git a/src/index.cpp b/src/index.cpp
index 840cebfd..cd307846 100644
--- a/src/index.cpp
+++ b/src/index.cpp
@@ -1497,7 +1497,7 @@ Option<bool> Index::do_filtering(filter_node_t* const root,
                                  const uint32_t& context_ids_length,
                                  const uint32_t* context_ids) const {
     // auto begin = std::chrono::high_resolution_clock::now();
-/**/    const filter a_filter = root->filter_exp;
+    const filter a_filter = root->filter_exp;
 
     bool is_referenced_filter = !a_filter.referenced_collection_name.empty();
     if (is_referenced_filter) {
@@ -1958,7 +1958,6 @@ Option<bool> Index::rearrange_filter_tree(filter_node_t* const root,
     if (root == nullptr) {
         return Option(true);
     }
-
     if (root->isOperator) {
         uint32_t l_filter_ids_length = 0;
         if (root->left != nullptr) {

From c4ef71f274dfbd14d502a7b44d2568be1e64f2e5 Mon Sep 17 00:00:00 2001
From: Harpreet Sangar <happy_san@protonmail.com>
Date: Fri, 27 Jan 2023 19:58:06 +0530
Subject: [PATCH 14/51] Add `Index::adaptive_filter`.

---
 src/index.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/index.cpp b/src/index.cpp
index cd307846..f9ba9f35 100644
--- a/src/index.cpp
+++ b/src/index.cpp
@@ -1958,6 +1958,7 @@ Option<bool> Index::rearrange_filter_tree(filter_node_t* const root,
     if (root == nullptr) {
         return Option(true);
     }
+
     if (root->isOperator) {
         uint32_t l_filter_ids_length = 0;
         if (root->left != nullptr) {

From f15f0822b234513a08ed13e0098188a2d9890d1e Mon Sep 17 00:00:00 2001
From: Harpreet Sangar <happy_san@protonmail.com>
Date: Mon, 30 Jan 2023 10:47:04 +0530
Subject: [PATCH 15/51] Refactor filtering logic.

---
 src/index.cpp | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/index.cpp b/src/index.cpp
index f9ba9f35..135d073c 100644
--- a/src/index.cpp
+++ b/src/index.cpp
@@ -2113,6 +2113,13 @@ void copy_reference_ids(filter_result_t& from, filter_result_t& to) {
             }
         }
     }
+
+    do_filtering(root);
+    filter_ids_length = root->match_index_ids.first;
+    filter_ids = root->match_index_ids.second;
+
+    // Prevents double deletion. We'll be deleting this array upstream and when the filter tree is destructed.
+    root->match_index_ids.second = nullptr;
 }
 
 Option<bool> Index::recursive_filter(filter_node_t* const root,

From a11a899a4c8ed0677a4b373eacc4149f8fe9ffd4 Mon Sep 17 00:00:00 2001
From: Harpreet Sangar <happy_san@protonmail.com>
Date: Thu, 2 Feb 2023 11:23:09 +0530
Subject: [PATCH 16/51] Add `reference_fields` map in `Collection`.

---
 include/index.h               | 2 +-
 src/index.cpp                 | 7 -------
 test/collection_join_test.cpp | 3 ++-
 3 files changed, 3 insertions(+), 9 deletions(-)

diff --git a/include/index.h b/include/index.h
index 600d3c6f..8742300f 100644
--- a/include/index.h
+++ b/include/index.h
@@ -704,7 +704,7 @@ public:
 
     void do_reference_filtering_with_lock(std::pair<uint32_t, uint32_t*>& reference_index_ids,
                                           filter_node_t* filter_tree_root,
-                                          const std::string& reference_field_name) const;
+                                          const std::string& reference_helper_field_name) const;
 
     void refresh_schemas(const std::vector<field>& new_fields, const std::vector<field>& del_fields);
 
diff --git a/src/index.cpp b/src/index.cpp
index 135d073c..f9ba9f35 100644
--- a/src/index.cpp
+++ b/src/index.cpp
@@ -2113,13 +2113,6 @@ void copy_reference_ids(filter_result_t& from, filter_result_t& to) {
             }
         }
     }
-
-    do_filtering(root);
-    filter_ids_length = root->match_index_ids.first;
-    filter_ids = root->match_index_ids.second;
-
-    // Prevents double deletion. We'll be deleting this array upstream and when the filter tree is destructed.
-    root->match_index_ids.second = nullptr;
 }
 
 Option<bool> Index::recursive_filter(filter_node_t* const root,
diff --git a/test/collection_join_test.cpp b/test/collection_join_test.cpp
index 9db121aa..b25439e6 100644
--- a/test/collection_join_test.cpp
+++ b/test/collection_join_test.cpp
@@ -265,6 +265,7 @@ TEST_F(CollectionJoinTest, IndexDocumentHavingReferenceField) {
         }
         ASSERT_TRUE(add_op.ok());
     }
+
     collectionManager.drop_collection("Customers");
     customers_schema_json =
             R"({
@@ -708,4 +709,4 @@ TEST_F(CollectionJoinTest, IncludeFieldsByReference_SingleMatch) {
 //    // 3 fields in Products document and 2 fields from Customers document
 //    ASSERT_EQ(5, res_obj["hits"][0]["document"].size());
 //    ASSERT_EQ(1, res_obj["hits"][0]["document"].count("product_id_sequence_id"));
-}
\ No newline at end of file
+}

From 424c0f20da412fcf270553095badbd1e6a5c07b6 Mon Sep 17 00:00:00 2001
From: Harpreet Sangar <happy_san@protonmail.com>
Date: Fri, 3 Feb 2023 14:30:17 +0530
Subject: [PATCH 17/51] Fix double locking of collection mutex.

---
 test/collection_join_test.cpp | 176 ++--------------------------------
 1 file changed, 8 insertions(+), 168 deletions(-)

diff --git a/test/collection_join_test.cpp b/test/collection_join_test.cpp
index b25439e6..11de2367 100644
--- a/test/collection_join_test.cpp
+++ b/test/collection_join_test.cpp
@@ -405,11 +405,11 @@ TEST_F(CollectionJoinTest, FilterByReference_SingleMatch) {
     ASSERT_EQ(1, result["hits"].size());
     ASSERT_EQ("soap", result["hits"][0]["document"]["product_name"].get<std::string>());
 
-    collectionManager.drop_collection("Customers");
-    collectionManager.drop_collection("Products");
+//    collectionManager.drop_collection("Customers");
+//    collectionManager.drop_collection("Products");
 }
 
-TEST_F(CollectionJoinTest, FilterByReference_MultipleMatch) {
+TEST_F(CollectionJoinTest, FilterByReferenceField_MultipleMatch) {
     auto schema_json =
             R"({
                 "name": "Users",
@@ -535,7 +535,7 @@ TEST_F(CollectionJoinTest, FilterByReference_MultipleMatch) {
         ASSERT_TRUE(add_op.ok());
     }
 
-    auto coll = collectionManager.get_collection_unsafe("Users");
+    auto coll = collectionManager.get_collection("Users");
 
     // Search for users linked to repo_b
     auto result = coll->search("R", {"user_name"}, "$Links(repo_id:=repo_b)", {}, {}, {0},
@@ -546,167 +546,7 @@ TEST_F(CollectionJoinTest, FilterByReference_MultipleMatch) {
     ASSERT_EQ("user_b", result["hits"][0]["document"]["user_id"].get<std::string>());
     ASSERT_EQ("user_a", result["hits"][1]["document"]["user_id"].get<std::string>());
 
-    collectionManager.drop_collection("Users");
-    collectionManager.drop_collection("Repos");
-    collectionManager.drop_collection("Links");
-}
-
-TEST_F(CollectionJoinTest, IncludeFieldsByReference_SingleMatch) {
-    auto schema_json =
-            R"({
-                "name": "Products",
-                "fields": [
-                    {"name": "product_id", "type": "string"},
-                    {"name": "product_name", "type": "string"},
-                    {"name": "product_description", "type": "string"}
-                ]
-            })"_json;
-    std::vector<nlohmann::json> documents = {
-            R"({
-                "product_id": "product_a",
-                "product_name": "shampoo",
-                "product_description": "Our new moisturizing shampoo is perfect for those with dry or damaged hair."
-            })"_json,
-            R"({
-                "product_id": "product_b",
-                "product_name": "soap",
-                "product_description": "Introducing our all-natural, organic soap bar made with essential oils and botanical ingredients."
-            })"_json
-    };
-    auto collection_create_op = collectionManager.create_collection(schema_json);
-    ASSERT_TRUE(collection_create_op.ok());
-    for (auto const &json: documents) {
-        auto add_op = collection_create_op.get()->add(json.dump());
-        if (!add_op.ok()) {
-            LOG(INFO) << add_op.error();
-        }
-        ASSERT_TRUE(add_op.ok());
-    }
-
-    schema_json =
-            R"({
-                "name": "Customers",
-                "fields": [
-                    {"name": "customer_id", "type": "string"},
-                    {"name": "customer_name", "type": "string"},
-                    {"name": "product_price", "type": "float"},
-                    {"name": "product_id", "type": "string", "reference": "Products.product_id"}
-                ]
-            })"_json;
-    documents = {
-            R"({
-                "customer_id": "customer_a",
-                "customer_name": "Joe",
-                "product_price": 143,
-                "product_id": "product_a"
-            })"_json,
-            R"({
-                "customer_id": "customer_a",
-                "customer_name": "Joe",
-                "product_price": 73.5,
-                "product_id": "product_b"
-            })"_json,
-            R"({
-                "customer_id": "customer_b",
-                "customer_name": "Dan",
-                "product_price": 75,
-                "product_id": "product_a"
-            })"_json,
-            R"({
-                "customer_id": "customer_b",
-                "customer_name": "Dan",
-                "product_price": 140,
-                "product_id": "product_b"
-            })"_json
-    };
-    collection_create_op = collectionManager.create_collection(schema_json);
-    ASSERT_TRUE(collection_create_op.ok());
-    for (auto const &json: documents) {
-        auto add_op = collection_create_op.get()->add(json.dump());
-        if (!add_op.ok()) {
-            LOG(INFO) << add_op.error();
-        }
-        ASSERT_TRUE(add_op.ok());
-    }
-
-    std::map<std::string, std::string> req_params = {
-            {"collection", "Products"},
-            {"q", "s"},
-            {"query_by", "product_name"},
-            {"filter_by", "$Customers(customer_id:=customer_a && product_price:<100)"},
-    };
-
-    nlohmann::json embedded_params;
-    std::string json_res;
-    auto now_ts = std::chrono::duration_cast<std::chrono::microseconds>(
-            std::chrono::system_clock::now().time_since_epoch()).count();
-
-    req_params["include_fields"] = "$foo.bar";
-    auto search_op = collectionManager.do_search(req_params, embedded_params, json_res, now_ts);
-    ASSERT_FALSE(search_op.ok());
-    ASSERT_EQ("Invalid reference in include_fields, expected `$CollectionName(fieldA, ...)`.", search_op.error());
-
-    req_params["include_fields"] = "$foo(bar";
-    search_op = collectionManager.do_search(req_params, embedded_params, json_res, now_ts);
-    ASSERT_FALSE(search_op.ok());
-    ASSERT_EQ("Invalid reference in include_fields, expected `$CollectionName(fieldA, ...)`.", search_op.error());
-
-//    req_params["include_fields"] = "$foo(bar)";
-//    search_op = collectionManager.do_search(req_params, embedded_params, json_res, now_ts);
-//    ASSERT_FALSE(search_op.ok());
-//    ASSERT_EQ("Referenced collection `foo` not found.", search_op.error());
-//
-//    req_params["include_fields"] = "$Customers(bar)";
-//    search_op = collectionManager.do_search(req_params, embedded_params, json_res, now_ts);
-//    ASSERT_TRUE(search_op.ok());
-//
-//    nlohmann::json res_obj = nlohmann::json::parse(json_res);
-//    ASSERT_EQ(1, res_obj["found"].get<size_t>());
-//    ASSERT_EQ(1, res_obj["hits"].size());
-//    ASSERT_EQ(0, res_obj["hits"][0]["document"].size());
-//
-//    req_params["include_fields"] = "$Customers(product_price)";
-//    search_op = collectionManager.do_search(req_params, embedded_params, json_res, now_ts);
-//    ASSERT_TRUE(search_op.ok());
-//
-//    res_obj = nlohmann::json::parse(json_res);
-//    ASSERT_EQ(1, res_obj["found"].get<size_t>());
-//    ASSERT_EQ(1, res_obj["hits"].size());
-//    ASSERT_EQ(1, res_obj["hits"][0]["document"].size());
-//    ASSERT_EQ(1, res_obj["hits"][0]["document"].count("product_price"));
-//    ASSERT_EQ(73.5, res_obj["hits"][0]["document"].at("product_price"));
-//
-//    req_params["include_fields"] = "$Customers(product_price, customer_id)";
-//    search_op = collectionManager.do_search(req_params, embedded_params, json_res, now_ts);
-//    ASSERT_TRUE(search_op.ok());
-//
-//    res_obj = nlohmann::json::parse(json_res);
-//    ASSERT_EQ(1, res_obj["found"].get<size_t>());
-//    ASSERT_EQ(1, res_obj["hits"].size());
-//    ASSERT_EQ(2, res_obj["hits"][0]["document"].size());
-//    ASSERT_EQ(1, res_obj["hits"][0]["document"].count("product_price"));
-//    ASSERT_EQ(73.5, res_obj["hits"][0]["document"].at("product_price"));
-//    ASSERT_EQ(1, res_obj["hits"][0]["document"].count("customer_id"));
-//    ASSERT_EQ("customer_a", res_obj["hits"][0]["document"].at("customer_id"));
-//
-//    req_params["include_fields"] = "*, $Customers(product_price, customer_id)";
-//    search_op = collectionManager.do_search(req_params, embedded_params, json_res, now_ts);
-//    ASSERT_TRUE(search_op.ok());
-//
-//    res_obj = nlohmann::json::parse(json_res);
-//    ASSERT_EQ(1, res_obj["found"].get<size_t>());
-//    ASSERT_EQ(1, res_obj["hits"].size());
-//    // 3 fields in Products document and 2 fields from Customers document
-//    ASSERT_EQ(5, res_obj["hits"][0]["document"].size());
-//
-//    req_params["include_fields"] = "*, $Customers(product*)";
-//    search_op = collectionManager.do_search(req_params, embedded_params, json_res, now_ts);
-//    ASSERT_TRUE(search_op.ok());
-//
-//    res_obj = nlohmann::json::parse(json_res);
-//    ASSERT_EQ(1, res_obj["found"].get<size_t>());
-//    ASSERT_EQ(1, res_obj["hits"].size());
-//    // 3 fields in Products document and 2 fields from Customers document
-//    ASSERT_EQ(5, res_obj["hits"][0]["document"].size());
-//    ASSERT_EQ(1, res_obj["hits"][0]["document"].count("product_id_sequence_id"));
-}
+//    collectionManager.drop_collection("Users");
+//    collectionManager.drop_collection("Repos");
+//    collectionManager.drop_collection("Links");
+}
\ No newline at end of file

From c77e0373ce5d10d84062290575a0112350ce38cb Mon Sep 17 00:00:00 2001
From: Harpreet Sangar <happy_san@protonmail.com>
Date: Tue, 7 Feb 2023 10:53:18 +0530
Subject: [PATCH 18/51] Fix tests.

---
 test/collection_join_test.cpp | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/test/collection_join_test.cpp b/test/collection_join_test.cpp
index 11de2367..98e7663f 100644
--- a/test/collection_join_test.cpp
+++ b/test/collection_join_test.cpp
@@ -405,11 +405,11 @@ TEST_F(CollectionJoinTest, FilterByReference_SingleMatch) {
     ASSERT_EQ(1, result["hits"].size());
     ASSERT_EQ("soap", result["hits"][0]["document"]["product_name"].get<std::string>());
 
-//    collectionManager.drop_collection("Customers");
-//    collectionManager.drop_collection("Products");
+    collectionManager.drop_collection("Customers");
+    collectionManager.drop_collection("Products");
 }
 
-TEST_F(CollectionJoinTest, FilterByReferenceField_MultipleMatch) {
+TEST_F(CollectionJoinTest, FilterByReference_MultipleMatch) {
     auto schema_json =
             R"({
                 "name": "Users",
@@ -535,7 +535,7 @@ TEST_F(CollectionJoinTest, FilterByReferenceField_MultipleMatch) {
         ASSERT_TRUE(add_op.ok());
     }
 
-    auto coll = collectionManager.get_collection("Users");
+    auto coll = collectionManager.get_collection_unsafe("Users");
 
     // Search for users linked to repo_b
     auto result = coll->search("R", {"user_name"}, "$Links(repo_id:=repo_b)", {}, {}, {0},
@@ -546,7 +546,7 @@ TEST_F(CollectionJoinTest, FilterByReferenceField_MultipleMatch) {
     ASSERT_EQ("user_b", result["hits"][0]["document"]["user_id"].get<std::string>());
     ASSERT_EQ("user_a", result["hits"][1]["document"]["user_id"].get<std::string>());
 
-//    collectionManager.drop_collection("Users");
-//    collectionManager.drop_collection("Repos");
-//    collectionManager.drop_collection("Links");
+    collectionManager.drop_collection("Users");
+    collectionManager.drop_collection("Repos");
+    collectionManager.drop_collection("Links");
 }
\ No newline at end of file

From e7949e650ab2ce91c6da7529ca5fa0ed137e133b Mon Sep 17 00:00:00 2001
From: Harpreet Sangar <happy_san@protonmail.com>
Date: Thu, 9 Feb 2023 11:50:58 +0530
Subject: [PATCH 19/51] Reference `include_fields`.

---
 test/collection_join_test.cpp | 160 ++++++++++++++++++++++++++++++++++
 1 file changed, 160 insertions(+)

diff --git a/test/collection_join_test.cpp b/test/collection_join_test.cpp
index 98e7663f..f302d3dc 100644
--- a/test/collection_join_test.cpp
+++ b/test/collection_join_test.cpp
@@ -549,4 +549,164 @@ TEST_F(CollectionJoinTest, FilterByReference_MultipleMatch) {
     collectionManager.drop_collection("Users");
     collectionManager.drop_collection("Repos");
     collectionManager.drop_collection("Links");
+}
+
+TEST_F(CollectionJoinTest, IncludeFieldsByReference_SingleMatch) {
+    auto schema_json =
+            R"({
+                "name": "Products",
+                "fields": [
+                    {"name": "product_id", "type": "string"},
+                    {"name": "product_name", "type": "string"},
+                    {"name": "product_description", "type": "string"}
+                ]
+            })"_json;
+    std::vector<nlohmann::json> documents = {
+            R"({
+                "product_id": "product_a",
+                "product_name": "shampoo",
+                "product_description": "Our new moisturizing shampoo is perfect for those with dry or damaged hair."
+            })"_json,
+            R"({
+                "product_id": "product_b",
+                "product_name": "soap",
+                "product_description": "Introducing our all-natural, organic soap bar made with essential oils and botanical ingredients."
+            })"_json
+    };
+    auto collection_create_op = collectionManager.create_collection(schema_json);
+    ASSERT_TRUE(collection_create_op.ok());
+    for (auto const &json: documents) {
+        auto add_op = collection_create_op.get()->add(json.dump());
+        if (!add_op.ok()) {
+            LOG(INFO) << add_op.error();
+        }
+        ASSERT_TRUE(add_op.ok());
+    }
+
+    schema_json =
+            R"({
+                "name": "Customers",
+                "fields": [
+                    {"name": "customer_id", "type": "string"},
+                    {"name": "customer_name", "type": "string"},
+                    {"name": "product_price", "type": "float"},
+                    {"name": "product_id", "type": "string", "reference": "Products.product_id"}
+                ]
+            })"_json;
+    documents = {
+            R"({
+                "customer_id": "customer_a",
+                "customer_name": "Joe",
+                "product_price": 143,
+                "product_id": "product_a"
+            })"_json,
+            R"({
+                "customer_id": "customer_a",
+                "customer_name": "Joe",
+                "product_price": 73.5,
+                "product_id": "product_b"
+            })"_json,
+            R"({
+                "customer_id": "customer_b",
+                "customer_name": "Dan",
+                "product_price": 75,
+                "product_id": "product_a"
+            })"_json,
+            R"({
+                "customer_id": "customer_b",
+                "customer_name": "Dan",
+                "product_price": 140,
+                "product_id": "product_b"
+            })"_json
+    };
+    collection_create_op = collectionManager.create_collection(schema_json);
+    ASSERT_TRUE(collection_create_op.ok());
+    for (auto const &json: documents) {
+        auto add_op = collection_create_op.get()->add(json.dump());
+        if (!add_op.ok()) {
+            LOG(INFO) << add_op.error();
+        }
+        ASSERT_TRUE(add_op.ok());
+    }
+
+    std::map<std::string, std::string> req_params = {
+            {"collection", "Products"},
+            {"q", "s"},
+            {"query_by", "product_name"},
+            {"filter_by", "$Customers(customer_id:=customer_a && product_price:<100)"},
+    };
+
+    nlohmann::json embedded_params;
+    std::string json_res;
+    auto now_ts = std::chrono::duration_cast<std::chrono::microseconds>(
+            std::chrono::system_clock::now().time_since_epoch()).count();
+
+    req_params["include_fields"] = "$foo.bar";
+    auto search_op = collectionManager.do_search(req_params, embedded_params, json_res, now_ts);
+    ASSERT_FALSE(search_op.ok());
+    ASSERT_EQ("Invalid reference in include_fields, expected `$CollectionName(fieldA, ...)`.", search_op.error());
+
+    req_params["include_fields"] = "$foo(bar";
+    search_op = collectionManager.do_search(req_params, embedded_params, json_res, now_ts);
+    ASSERT_FALSE(search_op.ok());
+    ASSERT_EQ("Invalid reference in include_fields, expected `$CollectionName(fieldA, ...)`.", search_op.error());
+
+//    req_params["include_fields"] = "$foo(bar)";
+//    search_op = collectionManager.do_search(req_params, embedded_params, json_res, now_ts);
+//    ASSERT_FALSE(search_op.ok());
+//    ASSERT_EQ("Referenced collection `foo` not found.", search_op.error());
+//
+//    req_params["include_fields"] = "$Customers(bar)";
+//    search_op = collectionManager.do_search(req_params, embedded_params, json_res, now_ts);
+//    ASSERT_TRUE(search_op.ok());
+//
+//    nlohmann::json res_obj = nlohmann::json::parse(json_res);
+//    ASSERT_EQ(1, res_obj["found"].get<size_t>());
+//    ASSERT_EQ(1, res_obj["hits"].size());
+//    ASSERT_EQ(0, res_obj["hits"][0]["document"].size());
+//
+//    req_params["include_fields"] = "$Customers(product_price)";
+//    search_op = collectionManager.do_search(req_params, embedded_params, json_res, now_ts);
+//    ASSERT_TRUE(search_op.ok());
+//
+//    res_obj = nlohmann::json::parse(json_res);
+//    ASSERT_EQ(1, res_obj["found"].get<size_t>());
+//    ASSERT_EQ(1, res_obj["hits"].size());
+//    ASSERT_EQ(1, res_obj["hits"][0]["document"].size());
+//    ASSERT_EQ(1, res_obj["hits"][0]["document"].count("product_price"));
+//    ASSERT_EQ(73.5, res_obj["hits"][0]["document"].at("product_price"));
+//
+//    req_params["include_fields"] = "$Customers(product_price, customer_id)";
+//    search_op = collectionManager.do_search(req_params, embedded_params, json_res, now_ts);
+//    ASSERT_TRUE(search_op.ok());
+//
+//    res_obj = nlohmann::json::parse(json_res);
+//    ASSERT_EQ(1, res_obj["found"].get<size_t>());
+//    ASSERT_EQ(1, res_obj["hits"].size());
+//    ASSERT_EQ(2, res_obj["hits"][0]["document"].size());
+//    ASSERT_EQ(1, res_obj["hits"][0]["document"].count("product_price"));
+//    ASSERT_EQ(73.5, res_obj["hits"][0]["document"].at("product_price"));
+//    ASSERT_EQ(1, res_obj["hits"][0]["document"].count("customer_id"));
+//    ASSERT_EQ("customer_a", res_obj["hits"][0]["document"].at("customer_id"));
+//
+//    req_params["include_fields"] = "*, $Customers(product_price, customer_id)";
+//    search_op = collectionManager.do_search(req_params, embedded_params, json_res, now_ts);
+//    ASSERT_TRUE(search_op.ok());
+//
+//    res_obj = nlohmann::json::parse(json_res);
+//    ASSERT_EQ(1, res_obj["found"].get<size_t>());
+//    ASSERT_EQ(1, res_obj["hits"].size());
+//    // 3 fields in Products document and 2 fields from Customers document
+//    ASSERT_EQ(5, res_obj["hits"][0]["document"].size());
+//
+//    req_params["include_fields"] = "*, $Customers(product*)";
+//    search_op = collectionManager.do_search(req_params, embedded_params, json_res, now_ts);
+//    ASSERT_TRUE(search_op.ok());
+//
+//    res_obj = nlohmann::json::parse(json_res);
+//    ASSERT_EQ(1, res_obj["found"].get<size_t>());
+//    ASSERT_EQ(1, res_obj["hits"].size());
+//    // 3 fields in Products document and 2 fields from Customers document
+//    ASSERT_EQ(5, res_obj["hits"][0]["document"].size());
+//    ASSERT_EQ(1, res_obj["hits"][0]["document"].count("product_id_sequence_id"));
 }
\ No newline at end of file

From 44668ebe27cee5d2ab9525b7cec8d34a6fb2f000 Mon Sep 17 00:00:00 2001
From: Harpreet Sangar <happy_san@protonmail.com>
Date: Thu, 9 Feb 2023 12:25:16 +0530
Subject: [PATCH 20/51] fix memory leak.

---
 src/collection.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/collection.cpp b/src/collection.cpp
index 3766a94d..5f3041a6 100644
--- a/src/collection.cpp
+++ b/src/collection.cpp
@@ -3938,6 +3938,8 @@ Option<bool> Collection::prune_doc(nlohmann::json& doc,
             reference_docs.push_back(ref_doc);
         }
 
+        delete[] documents[0].second;
+
         for (const auto &ref_doc: reference_docs) {
             doc.update(ref_doc);
         }

From bb4c0af996509eb6d2c7118041bcb30468d44745 Mon Sep 17 00:00:00 2001
From: Harpreet Sangar <happy_san@protonmail.com>
Date: Wed, 15 Feb 2023 16:48:44 +0530
Subject: [PATCH 21/51] Fix rebase error.

---
 include/collection.h | 8 ++------
 include/index.h      | 6 +++---
 src/collection.cpp   | 3 ---
 src/index.cpp        | 1 -
 4 files changed, 5 insertions(+), 13 deletions(-)

diff --git a/include/collection.h b/include/collection.h
index 14693473..8d77cfde 100644
--- a/include/collection.h
+++ b/include/collection.h
@@ -457,16 +457,12 @@ public:
 
     Option<bool> get_filter_ids(const std::string & filter_query, filter_result_t& filter_result) const;
 
-    Option<bool> get_reference_filter_ids(const std::string & filter_query,
+    Option<bool> get_reference_filter_ids(const std::string& filter_query,
                                           filter_result_t& filter_result,
-                                          const std::string & collection_name) const;
+                                          const std::string& collection_name) const;
 
     Option<bool> validate_reference_filter(const std::string& filter_query) const;
 
-    Option<bool> get_reference_filter_ids(const std::string & filter_query,
-                                          const std::string & collection_name,
-                                          std::pair<uint32_t, uint32_t*>& reference_index_ids) const;
-
     Option<nlohmann::json> get(const std::string & id) const;
 
     Option<std::string> remove(const std::string & id, bool remove_from_store = true);
diff --git a/include/index.h b/include/index.h
index 8742300f..3344b2ed 100644
--- a/include/index.h
+++ b/include/index.h
@@ -702,9 +702,9 @@ public:
                                                   const std::string& collection_name,
                                                   const std::string& reference_helper_field_name) const;
 
-    void do_reference_filtering_with_lock(std::pair<uint32_t, uint32_t*>& reference_index_ids,
-                                          filter_node_t* filter_tree_root,
-                                          const std::string& reference_helper_field_name) const;
+    Option<bool> do_reference_filtering_with_lock(filter_node_t* const filter_tree_root,
+                                                  filter_result_t& filter_result,
+                                                  const std::string & reference_helper_field_name) const;
 
     void refresh_schemas(const std::vector<field>& new_fields, const std::vector<field>& del_fields);
 
diff --git a/src/collection.cpp b/src/collection.cpp
index 5f3041a6..8ced0cae 100644
--- a/src/collection.cpp
+++ b/src/collection.cpp
@@ -2572,7 +2572,6 @@ Option<bool> Collection::validate_reference_filter(const std::string& filter_que
     filter_node_t* filter_tree_root = nullptr;
     Option<bool> filter_op = filter::parse_filter_query(filter_query, search_schema,
                                                         store, doc_id_prefix, filter_tree_root);
-
     if(!filter_op.ok()) {
         return filter_op;
     }
@@ -3938,8 +3937,6 @@ Option<bool> Collection::prune_doc(nlohmann::json& doc,
             reference_docs.push_back(ref_doc);
         }
 
-        delete[] documents[0].second;
-
         for (const auto &ref_doc: reference_docs) {
             doc.update(ref_doc);
         }
diff --git a/src/index.cpp b/src/index.cpp
index f9ba9f35..e88badc0 100644
--- a/src/index.cpp
+++ b/src/index.cpp
@@ -2719,7 +2719,6 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
                    const vector_query_t& vector_query,
                    size_t facet_sample_percent, size_t facet_sample_threshold,
                    const std::string& collection_name) const {
-
     std::shared_lock lock(mutex);
 
     filter_result_t filter_result;

From baff5aa9511b8c65e79aeb409091baee54ac4748 Mon Sep 17 00:00:00 2001
From: ozanarmagan <ozan-armagan@yandex.com>
Date: Sun, 5 Feb 2023 15:18:35 +0300
Subject: [PATCH 22/51] Added ONNX Runtime dependency

---
 .gitignore                              |   1 -
 CMakeLists.txt                          |   2 +-
 WORKSPACE                               |   5 +-
 bazel/foreign_cc.patch                  |   4 +-
 bazel/foreign_cc_version_compiler.patch | 283 ++++++++++++++++++++++++
 5 files changed, 290 insertions(+), 5 deletions(-)
 create mode 100644 bazel/foreign_cc_version_compiler.patch

diff --git a/.gitignore b/.gitignore
index 264c40e2..67960068 100644
--- a/.gitignore
+++ b/.gitignore
@@ -15,4 +15,3 @@ typesense-server-data/
 .clwb/.bazelproject
 .vscode/settings.json
 /onnxruntime-prefix
-
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9893882b..edcd6179 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -255,4 +255,4 @@ target_sources(search PRIVATE ${ONNX_EXT_SRC_FILES})
 add_dependencies(typesense-server onnxruntime_ext)
 add_dependencies(typesense-test onnxruntime_ext)
 add_dependencies(benchmark onnxruntime_ext)
-add_dependencies(search onnxruntime_ext)
\ No newline at end of file
+add_dependencies(search onnxruntime_ext)
diff --git a/WORKSPACE b/WORKSPACE
index 1dc1401d..94d24423 100644
--- a/WORKSPACE
+++ b/WORKSPACE
@@ -13,7 +13,10 @@ bazel_compdb_deps()
 
 http_archive(
     name = "rules_foreign_cc",
-    patches = ["//bazel:foreign_cc.patch"],
+    patches = ["//bazel:foreign_cc.patch", "//bazel:foreign_cc_version_compiler.patch"],
+    patch_args = [
+        "-p1",
+    ],
     sha256 = "2a4d07cd64b0719b39a7c12218a3e507672b82a97b98c6a89d38565894cf7c51",
     strip_prefix = "rules_foreign_cc-0.9.0",
     url = "https://github.com/bazelbuild/rules_foreign_cc/archive/refs/tags/0.9.0.tar.gz",
diff --git a/bazel/foreign_cc.patch b/bazel/foreign_cc.patch
index 9af0c8a7..9cb52c7c 100644
--- a/bazel/foreign_cc.patch
+++ b/bazel/foreign_cc.patch
@@ -1,5 +1,5 @@
---- foreign_cc/private/configure_script.bzl
-+++ foreign_cc/private/configure_script.bzl
+--- a/foreign_cc/private/configure_script.bzl
++++ b/foreign_cc/private/configure_script.bzl
 @@ -70,7 +70,7 @@
          ).lstrip())
  
diff --git a/bazel/foreign_cc_version_compiler.patch b/bazel/foreign_cc_version_compiler.patch
new file mode 100644
index 00000000..d668a1db
--- /dev/null
+++ b/bazel/foreign_cc_version_compiler.patch
@@ -0,0 +1,283 @@
+diff --git a/foreign_cc/private/cc_toolchain_util.bzl b/foreign_cc/private/cc_toolchain_util.bzl
+index fd7fa4d..188dc5f 100644
+--- a/foreign_cc/private/cc_toolchain_util.bzl
++++ b/foreign_cc/private/cc_toolchain_util.bzl
+@@ -265,15 +265,24 @@ def get_tools_info(ctx):
+         cc_toolchain = cc_toolchain,
+     )
+
++    cxx = cc_common.get_tool_for_action(
++            feature_configuration = feature_configuration,
++            action_name = ACTION_NAMES.cpp_compile,
++        )
++    cxx_splitted = cxx.split("/")
++    if(cxx_splitted[-1] == "gcc"):
++        cxx_splitted[-1] = "g++"
++        cxx = "/".join(cxx_splitted)
++    if(cxx_splitted[-1] == "clang"):
++        cxx_splitted = "clang++"
++        cxx = "/".join(cxx_splitted)
++
+     return CxxToolsInfo(
+         cc = cc_common.get_tool_for_action(
+             feature_configuration = feature_configuration,
+             action_name = ACTION_NAMES.c_compile,
+         ),
+-        cxx = cc_common.get_tool_for_action(
+-            feature_configuration = feature_configuration,
+-            action_name = ACTION_NAMES.cpp_compile,
+-        ),
++        cxx = cxx,
+         cxx_linker_static = cc_common.get_tool_for_action(
+             feature_configuration = feature_configuration,
+             action_name = ACTION_NAMES.cpp_link_static_library,
+diff --git a/toolchains/built_toolchains.bzl b/toolchains/built_toolchains.bzl
+index 5e59e79..ddf63a5 100644
+--- a/toolchains/built_toolchains.bzl
++++ b/toolchains/built_toolchains.bzl
+@@ -28,6 +28,7 @@ _CMAKE_SRCS = {
+     "3.22.4": [["https://github.com/Kitware/CMake/releases/download/v3.22.4/cmake-3.22.4.tar.gz"], "cmake-3.22.4", "5c55d0b0bc4c191549e3502b8f99a4fe892077611df22b4178cc020626e22a47"],
+     "3.23.1": [["https://github.com/Kitware/CMake/releases/download/v3.23.1/cmake-3.23.1.tar.gz"], "cmake-3.23.1", "33fd10a8ec687a4d0d5b42473f10459bb92b3ae7def2b745dc10b192760869f3"],
+     "3.23.2": [["https://github.com/Kitware/CMake/releases/download/v3.23.2/cmake-3.23.2.tar.gz"], "cmake-3.23.2", "f316b40053466f9a416adf981efda41b160ca859e97f6a484b447ea299ff26aa"],
++    "3.25.0": [["https://github.com/Kitware/CMake/releases/download/v3.25.0/cmake-3.25.0.tar.gz"], "cmake-3.25.0", "306463f541555da0942e6f5a0736560f70c487178b9d94a5ae7f34d0538cdd48"],
+ }
+
+ # buildifier: disable=unnamed-macro
+@@ -438,6 +439,18 @@ def _ninja_toolchain(version, register_toolchains):
+         native.register_toolchains(
+             "@rules_foreign_cc//toolchains:built_ninja_toolchain",
+         )
++    if version == "1.11.1":
++        maybe(
++            http_archive,
++            name = "ninja_build_src",
++            build_file_content = _ALL_CONTENT,
++            sha256 = "31747ae633213f1eda3842686f83c2aa1412e0f5691d1c14dbbcc67fe7400cea",
++            strip_prefix = "ninja-1.11.1",
++            urls = [
++                "https://github.com/ninja-build/ninja/archive/v1.11.1.tar.gz",
++            ],
++        )
++        return
+     if version == "1.11.0":
+         maybe(
+             http_archive,
+diff --git a/toolchains/prebuilt_toolchains.bzl b/toolchains/prebuilt_toolchains.bzl
+index dabfb95..d9c38b4 100644
+--- a/toolchains/prebuilt_toolchains.bzl
++++ b/toolchains/prebuilt_toolchains.bzl
+@@ -67,6 +67,115 @@ def prebuilt_toolchains(cmake_version, ninja_version, register_toolchains):
+     _make_toolchains(register_toolchains)
+
+ def _cmake_toolchains(version, register_toolchains):
++    if "3.25.0" == version:
++        maybe(
++            http_archive,
++            name = "cmake-3.25.0-linux-aarch64",
++            urls = [
++                "https://github.com/Kitware/CMake/releases/download/v3.25.0/cmake-3.25.0-linux-aarch64.tar.gz",
++            ],
++            sha256 = "27da36d6debe9b30f5c498554ae40cd621a55736f5f2ae2618ed95722a59965a",
++            strip_prefix = "cmake-3.25.0-linux-aarch64",
++            build_file_content = _CMAKE_BUILD_FILE.format(
++                bin = "cmake",
++                env = "{}",
++            ),
++        )
++
++        maybe(
++            http_archive,
++            name = "cmake-3.25.0-linux-x86_64",
++            urls = [
++                "https://github.com/Kitware/CMake/releases/download/v3.25.0/cmake-3.25.0-linux-x86_64.tar.gz",
++            ],
++            sha256 = "ac634d6f0a81d7089adc7be5acff66a6bee3b08615f9a947858ce92a9ef59c8b",
++            strip_prefix = "cmake-3.25.0-linux-x86_64",
++            build_file_content = _CMAKE_BUILD_FILE.format(
++                bin = "cmake",
++                env = "{}",
++            ),
++        )
++
++        maybe(
++            http_archive,
++            name = "cmake-3.25.0-macos-universal",
++            urls = [
++                "https://github.com/Kitware/CMake/releases/download/v3.25.0/cmake-3.25.0-macos-universal.tar.gz",
++            ],
++            sha256 = "c088e761534a2078cd9d0581d39f02d3f9ed05302e33135b55c6d619b263b4c3",
++            strip_prefix = "cmake-3.25.0-macos-universal/CMake.app/Contents",
++            build_file_content = _CMAKE_BUILD_FILE.format(
++                bin = "cmake",
++                env = "{}",
++            ),
++        )
++
++        maybe(
++            http_archive,
++            name = "cmake-3.25.0-windows-i386",
++            urls = [
++                "https://github.com/Kitware/CMake/releases/download/v3.25.0/cmake-3.25.0-windows-i386.zip",
++            ],
++            sha256 = "ddd115257a19ff3dd18fc63f32a00ae742f8b62d2e39bc354629903512f99783",
++            strip_prefix = "cmake-3.25.0-windows-i386",
++            build_file_content = _CMAKE_BUILD_FILE.format(
++                bin = "cmake.exe",
++                env = "{}",
++            ),
++        )
++
++        maybe(
++            http_archive,
++            name = "cmake-3.25.0-windows-x86_64",
++            urls = [
++                "https://github.com/Kitware/CMake/releases/download/v3.25.0/cmake-3.25.0-windows-x86_64.zip",
++            ],
++            sha256 = "b46030c10cab1170355952f9ac59f7e6dabc248070fc53f15dff11d4ed2910f8",
++            strip_prefix = "cmake-3.25.0-windows-x86_64",
++            build_file_content = _CMAKE_BUILD_FILE.format(
++                bin = "cmake.exe",
++                env = "{}",
++            ),
++        )
++
++        # buildifier: leave-alone
++        maybe(
++            prebuilt_toolchains_repository,
++            name = "cmake_3.25.0_toolchains",
++            repos = {
++                "cmake-3.25.0-linux-aarch64": [
++                    "@platforms//cpu:aarch64",
++                    "@platforms//os:linux",
++                ],
++                "cmake-3.25.0-linux-x86_64": [
++                    "@platforms//cpu:x86_64",
++                    "@platforms//os:linux",
++                ],
++                "cmake-3.25.0-macos-universal": [
++                    "@platforms//os:macos",
++                ],
++                "cmake-3.25.0-windows-i386": [
++                    "@platforms//cpu:x86_32",
++                    "@platforms//os:windows",
++                ],
++                "cmake-3.25.0-windows-x86_64": [
++                    "@platforms//cpu:x86_64",
++                    "@platforms//os:windows",
++                ],
++            },
++            tool = "cmake",
++        )
++
++        if register_toolchains:
++            native.register_toolchains(
++                "@cmake_3.25.0_toolchains//:cmake-3.25.0-linux-aarch64_toolchain",
++                "@cmake_3.25.0_toolchains//:cmake-3.25.0-linux-x86_64_toolchain",
++                "@cmake_3.25.0_toolchains//:cmake-3.25.0-macos-universal_toolchain",
++                "@cmake_3.25.0_toolchains//:cmake-3.25.0-windows-i386_toolchain",
++                "@cmake_3.25.0_toolchains//:cmake-3.25.0-windows-x86_64_toolchain",
++            )
++
++        return
+     if "3.23.2" == version:
+         maybe(
+             http_archive,
+@@ -4196,6 +4305,78 @@ def _cmake_toolchains(version, register_toolchains):
+     fail("Unsupported version: " + str(version))
+
+ def _ninja_toolchains(version, register_toolchains):
++    if "1.11.1" == version:
++        maybe(
++            http_archive,
++            name = "ninja_1.11.1_linux",
++            urls = [
++                "https://github.com/ninja-build/ninja/releases/download/v1.11.1/ninja-linux.zip",
++            ],
++            sha256 = "b901ba96e486dce377f9a070ed4ef3f79deb45f4ffe2938f8e7ddc69cfb3df77",
++            strip_prefix = "",
++            build_file_content = _NINJA_BUILD_FILE.format(
++                bin = "ninja",
++                env = "{\"NINJA\": \"$(execpath :ninja_bin)\"}",
++            ),
++        )
++
++        maybe(
++            http_archive,
++            name = "ninja_1.11.1_mac",
++            urls = [
++                "https://github.com/ninja-build/ninja/releases/download/v1.11.1/ninja-mac.zip",
++            ],
++            sha256 = "482ecb23c59ae3d4f158029112de172dd96bb0e97549c4b1ca32d8fad11f873e",
++            strip_prefix = "",
++            build_file_content = _NINJA_BUILD_FILE.format(
++                bin = "ninja",
++                env = "{\"NINJA\": \"$(execpath :ninja_bin)\"}",
++            ),
++        )
++
++        maybe(
++            http_archive,
++            name = "ninja_1.11.1_win",
++            urls = [
++                "https://github.com/ninja-build/ninja/releases/download/v1.11.1/ninja-win.zip",
++            ],
++            sha256 = "524b344a1a9a55005eaf868d991e090ab8ce07fa109f1820d40e74642e289abc",
++            strip_prefix = "",
++            build_file_content = _NINJA_BUILD_FILE.format(
++                bin = "ninja.exe",
++                env = "{\"NINJA\": \"$(execpath :ninja_bin)\"}",
++            ),
++        )
++
++        # buildifier: leave-alone
++        maybe(
++            prebuilt_toolchains_repository,
++            name = "ninja_1.11.1_toolchains",
++            repos = {
++                "ninja_1.11.1_linux": [
++                    "@platforms//cpu:x86_64",
++                    "@platforms//os:linux",
++                ],
++                "ninja_1.11.1_mac": [
++                    "@platforms//cpu:x86_64",
++                    "@platforms//os:macos",
++                ],
++                "ninja_1.11.1_win": [
++                    "@platforms//cpu:x86_64",
++                    "@platforms//os:windows",
++                ],
++            },
++            tool = "ninja",
++        )
++
++        if register_toolchains:
++            native.register_toolchains(
++                "@ninja_1.11.1_toolchains//:ninja_1.11.1_linux_toolchain",
++                "@ninja_1.11.1_toolchains//:ninja_1.11.1_mac_toolchain",
++                "@ninja_1.11.1_toolchains//:ninja_1.11.1_win_toolchain",
++            )
++
++        return
+     if "1.11.0" == version:
+         maybe(
+             http_archive,
+diff --git a/toolchains/prebuilt_toolchains.py b/toolchains/prebuilt_toolchains.py
+index 5288b27..a193021 100755
+--- a/toolchains/prebuilt_toolchains.py
++++ b/toolchains/prebuilt_toolchains.py
+@@ -10,6 +10,7 @@ CMAKE_SHA256_URL_TEMPLATE = "https://cmake.org/files/v{minor}/cmake-{full}-SHA-2
+ CMAKE_URL_TEMPLATE = "https://github.com/Kitware/CMake/releases/download/v{full}/{file}"
+
+ CMAKE_VERSIONS = [
++    "3.25.0",
+     "3.23.2",
+     "3.23.1",
+     "3.22.4",
+@@ -116,6 +117,7 @@ NINJA_TARGETS = {
+ }
+
+ NINJA_VERSIONS = (
++    "1.11.1",
+     "1.10.2",
+     "1.10.1",
+     "1.10.0",

From 36c76e364893a493196903e23c9e48d661abe62a Mon Sep 17 00:00:00 2001
From: Harpreet Sangar <happy_san@protonmail.com>
Date: Tue, 17 Jan 2023 14:08:39 +0530
Subject: [PATCH 23/51] Abstract `foo_sequence_id` field from user.

---
 include/field.h            | 7 +++++++
 src/collection_manager.cpp | 4 ++++
 2 files changed, 11 insertions(+)

diff --git a/include/field.h b/include/field.h
index 18a1d4b7..44de4637 100644
--- a/include/field.h
+++ b/include/field.h
@@ -11,6 +11,7 @@
 #include <tsl/htrie_map.h>
 #include "json.hpp"
 #include "text_embedder_manager.h"
+#include <regex>
 
 namespace field_types {
     // first field value indexed will determine the type
@@ -284,11 +285,17 @@ struct field {
                                               const std::string & default_sorting_field,
                                               nlohmann::json& fields_json) {
         bool found_default_sorting_field = false;
+        const std::regex sequence_id_pattern(".*_sequence_id$");
 
         // Check for duplicates in field names
         std::map<std::string, std::vector<const field*>> unique_fields;
 
         for(const field & field: fields) {
+            if (std::regex_match(field.name, sequence_id_pattern)) {
+                // Don't add foo_sequence_id field.
+                continue;
+            }
+
             unique_fields[field.name].push_back(&field);
 
             if(field.name == "id") {
diff --git a/src/collection_manager.cpp b/src/collection_manager.cpp
index 0475dfbe..96a187b9 100644
--- a/src/collection_manager.cpp
+++ b/src/collection_manager.cpp
@@ -89,6 +89,10 @@ Collection* CollectionManager::init_collection(const nlohmann::json & collection
         }
 
         fields.push_back(f);
+
+        if (!f.reference.empty()) {
+            fields.emplace_back(field(f.name + "_sequence_id", "string", false, f.optional, true));
+        }
     }
 
     std::string default_sorting_field = collection_meta[Collection::COLLECTION_DEFAULT_SORTING_FIELD_KEY].get<std::string>();

From 2670638648618ff4f526eef8646d9e590b8bdc28 Mon Sep 17 00:00:00 2001
From: Harpreet Sangar <happy_san@protonmail.com>
Date: Thu, 19 Jan 2023 11:25:43 +0530
Subject: [PATCH 24/51] Serialize sequence id.

---
 test/collection_join_test.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/test/collection_join_test.cpp b/test/collection_join_test.cpp
index f302d3dc..9db121aa 100644
--- a/test/collection_join_test.cpp
+++ b/test/collection_join_test.cpp
@@ -265,7 +265,6 @@ TEST_F(CollectionJoinTest, IndexDocumentHavingReferenceField) {
         }
         ASSERT_TRUE(add_op.ok());
     }
-
     collectionManager.drop_collection("Customers");
     customers_schema_json =
             R"({

From beb5e700cb68285b8c08faced93cfb2668a4cfe8 Mon Sep 17 00:00:00 2001
From: Harpreet Sangar <happy_san@protonmail.com>
Date: Thu, 19 Jan 2023 11:27:52 +0530
Subject: [PATCH 25/51] Store `foo_sequence_id` in collection's meta-data.

---
 include/field.h            | 6 ------
 src/collection_manager.cpp | 4 ----
 2 files changed, 10 deletions(-)

diff --git a/include/field.h b/include/field.h
index 44de4637..63feff0f 100644
--- a/include/field.h
+++ b/include/field.h
@@ -285,17 +285,11 @@ struct field {
                                               const std::string & default_sorting_field,
                                               nlohmann::json& fields_json) {
         bool found_default_sorting_field = false;
-        const std::regex sequence_id_pattern(".*_sequence_id$");
 
         // Check for duplicates in field names
         std::map<std::string, std::vector<const field*>> unique_fields;
 
         for(const field & field: fields) {
-            if (std::regex_match(field.name, sequence_id_pattern)) {
-                // Don't add foo_sequence_id field.
-                continue;
-            }
-
             unique_fields[field.name].push_back(&field);
 
             if(field.name == "id") {
diff --git a/src/collection_manager.cpp b/src/collection_manager.cpp
index 96a187b9..0475dfbe 100644
--- a/src/collection_manager.cpp
+++ b/src/collection_manager.cpp
@@ -89,10 +89,6 @@ Collection* CollectionManager::init_collection(const nlohmann::json & collection
         }
 
         fields.push_back(f);
-
-        if (!f.reference.empty()) {
-            fields.emplace_back(field(f.name + "_sequence_id", "string", false, f.optional, true));
-        }
     }
 
     std::string default_sorting_field = collection_meta[Collection::COLLECTION_DEFAULT_SORTING_FIELD_KEY].get<std::string>();

From 1fbfa34672b44ea3134e881925bfcd3f185d922b Mon Sep 17 00:00:00 2001
From: Harpreet Sangar <happy_san@protonmail.com>
Date: Sun, 22 Jan 2023 12:02:29 +0530
Subject: [PATCH 26/51] Filter by reference.

---
 include/collection.h          |  2 ++
 src/collection.cpp            | 16 ++++++++++++++++
 test/collection_join_test.cpp |  3 +++
 3 files changed, 21 insertions(+)

diff --git a/include/collection.h b/include/collection.h
index 8d77cfde..55855a28 100644
--- a/include/collection.h
+++ b/include/collection.h
@@ -463,6 +463,8 @@ public:
 
     Option<bool> validate_reference_filter(const std::string& filter_query) const;
 
+    Option<bool> validate_reference_filter(const std::string& filter_query) const;
+
     Option<nlohmann::json> get(const std::string & id) const;
 
     Option<std::string> remove(const std::string & id, bool remove_from_store = true);
diff --git a/src/collection.cpp b/src/collection.cpp
index 8ced0cae..b1c24216 100644
--- a/src/collection.cpp
+++ b/src/collection.cpp
@@ -2580,6 +2580,22 @@ Option<bool> Collection::validate_reference_filter(const std::string& filter_que
     return Option<bool>(true);
 }
 
+Option<bool> Collection::validate_reference_filter(const std::string& filter_query) const {
+    std::shared_lock lock(mutex);
+
+    const std::string doc_id_prefix = std::to_string(collection_id) + "_" + DOC_ID_PREFIX + "_";
+    filter_node_t* filter_tree_root = nullptr;
+    Option<bool> filter_op = filter::parse_filter_query(filter_query, search_schema,
+                                                        store, doc_id_prefix, filter_tree_root);
+
+    if(!filter_op.ok()) {
+        return filter_op;
+    }
+
+    delete filter_tree_root;
+    return Option<bool>(true);
+}
+
 bool Collection::facet_value_to_string(const facet &a_facet, const facet_count_t &facet_count,
                                        const nlohmann::json &document, std::string &value) const {
 
diff --git a/test/collection_join_test.cpp b/test/collection_join_test.cpp
index 9db121aa..0cedd216 100644
--- a/test/collection_join_test.cpp
+++ b/test/collection_join_test.cpp
@@ -284,6 +284,9 @@ TEST_F(CollectionJoinTest, IndexDocumentHavingReferenceField) {
     ASSERT_TRUE(add_doc_op.ok());
     ASSERT_EQ(customer_collection->get("0").get().count("reference_id_sequence_id"), 1);
 
+    // Referenced document should be accessible from Customers collection.
+    auto sequence_id = collectionManager.get_collection("Products")->get_seq_id_collection_prefix() + "_" +
+                                customer_collection->get("0").get()["product_id_sequence_id"].get<std::string>();
     nlohmann::json document;
     // Referenced document's sequence_id must be valid.
     auto get_op = collectionManager.get_collection("Products")->get_document_from_store(

From 076a04c06218af665a1be549b12551f94f7720e9 Mon Sep 17 00:00:00 2001
From: Harpreet Sangar <happy_san@protonmail.com>
Date: Tue, 24 Jan 2023 10:57:29 +0530
Subject: [PATCH 27/51] Optimize reference filtering.

---
 include/collection.h          |  4 ++++
 include/index.h               |  4 ++++
 src/collection.cpp            | 35 +++++++++++++++++++++++++++++++++++
 src/index.cpp                 |  2 +-
 test/collection_join_test.cpp |  3 ---
 5 files changed, 44 insertions(+), 4 deletions(-)

diff --git a/include/collection.h b/include/collection.h
index 55855a28..38e91d1f 100644
--- a/include/collection.h
+++ b/include/collection.h
@@ -463,6 +463,10 @@ public:
 
     Option<bool> validate_reference_filter(const std::string& filter_query) const;
 
+    Option<bool> get_reference_filter_ids(const std::string & filter_query,
+                                          const std::string & collection_name,
+                                          std::pair<uint32_t, uint32_t*>& reference_index_ids) const;
+
     Option<bool> validate_reference_filter(const std::string& filter_query) const;
 
     Option<nlohmann::json> get(const std::string & id) const;
diff --git a/include/index.h b/include/index.h
index 3344b2ed..b427763a 100644
--- a/include/index.h
+++ b/include/index.h
@@ -706,6 +706,10 @@ public:
                                                   filter_result_t& filter_result,
                                                   const std::string & reference_helper_field_name) const;
 
+    void do_reference_filtering_with_lock(std::pair<uint32_t, uint32_t*>& reference_index_ids,
+                                          filter_node_t const* const& filter_tree_root,
+                                          const std::string& reference_field_name) const;
+
     void refresh_schemas(const std::vector<field>& new_fields, const std::vector<field>& del_fields);
 
     // the following methods are not synchronized because their parent calls are synchronized or they are const/static
diff --git a/src/collection.cpp b/src/collection.cpp
index b1c24216..3a740723 100644
--- a/src/collection.cpp
+++ b/src/collection.cpp
@@ -2580,6 +2580,41 @@ Option<bool> Collection::validate_reference_filter(const std::string& filter_que
     return Option<bool>(true);
 }
 
+Option<bool> Collection::get_reference_filter_ids(const std::string & filter_query,
+                                                  const std::string & collection_name,
+                                                  std::pair<uint32_t, uint32_t*>& reference_index_ids) const {
+    std::shared_lock lock(mutex);
+
+    std::string reference_field_name;
+    for (auto const& field: fields) {
+        if (!field.reference.empty() &&
+            field.reference.find(collection_name) == 0 &&
+            field.reference.find('.') == collection_name.size()) {
+            reference_field_name = field.name;
+            break;
+        }
+    }
+
+    if (reference_field_name.empty()) {
+        return Option<bool>(400, "Could not find any field in `" + name + "` referencing the collection `"
+                                                    + collection_name + "`.");
+    }
+
+    const std::string doc_id_prefix = std::to_string(collection_id) + "_" + DOC_ID_PREFIX + "_";
+    filter_node_t* filter_tree_root = nullptr;
+    Option<bool> filter_op = filter::parse_filter_query(filter_query, search_schema,
+                                                        store, doc_id_prefix, filter_tree_root);
+    if(!filter_op.ok()) {
+        return filter_op;
+    }
+
+    reference_field_name += "_sequence_id";
+    index->do_reference_filtering_with_lock(reference_index_ids, filter_tree_root, reference_field_name);
+
+    delete filter_tree_root;
+    return Option<bool>(true);
+}
+
 Option<bool> Collection::validate_reference_filter(const std::string& filter_query) const {
     std::shared_lock lock(mutex);
 
diff --git a/src/index.cpp b/src/index.cpp
index e88badc0..3ec45ac4 100644
--- a/src/index.cpp
+++ b/src/index.cpp
@@ -1497,7 +1497,7 @@ Option<bool> Index::do_filtering(filter_node_t* const root,
                                  const uint32_t& context_ids_length,
                                  const uint32_t* context_ids) const {
     // auto begin = std::chrono::high_resolution_clock::now();
-    const filter a_filter = root->filter_exp;
+/**/    const filter a_filter = root->filter_exp;
 
     bool is_referenced_filter = !a_filter.referenced_collection_name.empty();
     if (is_referenced_filter) {
diff --git a/test/collection_join_test.cpp b/test/collection_join_test.cpp
index 0cedd216..9db121aa 100644
--- a/test/collection_join_test.cpp
+++ b/test/collection_join_test.cpp
@@ -284,9 +284,6 @@ TEST_F(CollectionJoinTest, IndexDocumentHavingReferenceField) {
     ASSERT_TRUE(add_doc_op.ok());
     ASSERT_EQ(customer_collection->get("0").get().count("reference_id_sequence_id"), 1);
 
-    // Referenced document should be accessible from Customers collection.
-    auto sequence_id = collectionManager.get_collection("Products")->get_seq_id_collection_prefix() + "_" +
-                                customer_collection->get("0").get()["product_id_sequence_id"].get<std::string>();
     nlohmann::json document;
     // Referenced document's sequence_id must be valid.
     auto get_op = collectionManager.get_collection("Products")->get_document_from_store(

From eacd644d3aede179cf1cc128c4bc367a9ce7a64c Mon Sep 17 00:00:00 2001
From: Harpreet Sangar <happy_san@protonmail.com>
Date: Fri, 27 Jan 2023 12:57:13 +0530
Subject: [PATCH 28/51] Add `Index::rearranging_recursive_filter`.

---
 include/index.h | 2 +-
 src/index.cpp   | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/include/index.h b/include/index.h
index b427763a..fdc059d7 100644
--- a/include/index.h
+++ b/include/index.h
@@ -707,7 +707,7 @@ public:
                                                   const std::string & reference_helper_field_name) const;
 
     void do_reference_filtering_with_lock(std::pair<uint32_t, uint32_t*>& reference_index_ids,
-                                          filter_node_t const* const& filter_tree_root,
+                                          filter_node_t* filter_tree_root,
                                           const std::string& reference_field_name) const;
 
     void refresh_schemas(const std::vector<field>& new_fields, const std::vector<field>& del_fields);
diff --git a/src/index.cpp b/src/index.cpp
index 3ec45ac4..6592b87b 100644
--- a/src/index.cpp
+++ b/src/index.cpp
@@ -1497,7 +1497,7 @@ Option<bool> Index::do_filtering(filter_node_t* const root,
                                  const uint32_t& context_ids_length,
                                  const uint32_t* context_ids) const {
     // auto begin = std::chrono::high_resolution_clock::now();
-/**/    const filter a_filter = root->filter_exp;
+    const filter a_filter = root->filter_exp;
 
     bool is_referenced_filter = !a_filter.referenced_collection_name.empty();
     if (is_referenced_filter) {
@@ -1958,7 +1958,6 @@ Option<bool> Index::rearrange_filter_tree(filter_node_t* const root,
     if (root == nullptr) {
         return Option(true);
     }
-
     if (root->isOperator) {
         uint32_t l_filter_ids_length = 0;
         if (root->left != nullptr) {

From d0069fe2d3d4c99d048a480b2c9dce31633a57fd Mon Sep 17 00:00:00 2001
From: Harpreet Sangar <happy_san@protonmail.com>
Date: Fri, 27 Jan 2023 19:58:06 +0530
Subject: [PATCH 29/51] Add `Index::adaptive_filter`.

---
 src/index.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/index.cpp b/src/index.cpp
index 6592b87b..e88badc0 100644
--- a/src/index.cpp
+++ b/src/index.cpp
@@ -1958,6 +1958,7 @@ Option<bool> Index::rearrange_filter_tree(filter_node_t* const root,
     if (root == nullptr) {
         return Option(true);
     }
+
     if (root->isOperator) {
         uint32_t l_filter_ids_length = 0;
         if (root->left != nullptr) {

From 39a027043431a0e54c00ba8d92892b51467b1854 Mon Sep 17 00:00:00 2001
From: Harpreet Sangar <happy_san@protonmail.com>
Date: Thu, 2 Feb 2023 11:23:09 +0530
Subject: [PATCH 30/51] Add `reference_fields` map in `Collection`.

---
 include/index.h               |  2 +-
 src/collection.cpp            | 12 ++++++------
 test/collection_join_test.cpp |  3 ++-
 3 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/include/index.h b/include/index.h
index fdc059d7..92b9f7af 100644
--- a/include/index.h
+++ b/include/index.h
@@ -708,7 +708,7 @@ public:
 
     void do_reference_filtering_with_lock(std::pair<uint32_t, uint32_t*>& reference_index_ids,
                                           filter_node_t* filter_tree_root,
-                                          const std::string& reference_field_name) const;
+                                          const std::string& reference_helper_field_name) const;
 
     void refresh_schemas(const std::vector<field>& new_fields, const std::vector<field>& del_fields);
 
diff --git a/src/collection.cpp b/src/collection.cpp
index 3a740723..c3ef5a49 100644
--- a/src/collection.cpp
+++ b/src/collection.cpp
@@ -2586,11 +2586,10 @@ Option<bool> Collection::get_reference_filter_ids(const std::string & filter_que
     std::shared_lock lock(mutex);
 
     std::string reference_field_name;
-    for (auto const& field: fields) {
-        if (!field.reference.empty() &&
-            field.reference.find(collection_name) == 0 &&
-            field.reference.find('.') == collection_name.size()) {
-            reference_field_name = field.name;
+    for (auto const& pair: reference_fields) {
+        auto reference_pair = pair.second;
+        if (reference_pair.collection == collection_name) {
+            reference_field_name = reference_pair.field;
             break;
         }
     }
@@ -2608,7 +2607,8 @@ Option<bool> Collection::get_reference_filter_ids(const std::string & filter_que
         return filter_op;
     }
 
-    reference_field_name += "_sequence_id";
+    // Reference helper field has the sequence id of other collection's documents.
+    reference_field_name += REFERENCE_HELPER_FIELD_SUFFIX;
     index->do_reference_filtering_with_lock(reference_index_ids, filter_tree_root, reference_field_name);
 
     delete filter_tree_root;
diff --git a/test/collection_join_test.cpp b/test/collection_join_test.cpp
index 9db121aa..b25439e6 100644
--- a/test/collection_join_test.cpp
+++ b/test/collection_join_test.cpp
@@ -265,6 +265,7 @@ TEST_F(CollectionJoinTest, IndexDocumentHavingReferenceField) {
         }
         ASSERT_TRUE(add_op.ok());
     }
+
     collectionManager.drop_collection("Customers");
     customers_schema_json =
             R"({
@@ -708,4 +709,4 @@ TEST_F(CollectionJoinTest, IncludeFieldsByReference_SingleMatch) {
 //    // 3 fields in Products document and 2 fields from Customers document
 //    ASSERT_EQ(5, res_obj["hits"][0]["document"].size());
 //    ASSERT_EQ(1, res_obj["hits"][0]["document"].count("product_id_sequence_id"));
-}
\ No newline at end of file
+}

From 82dddd3b6dd4ac4f4e9b1cbca03434f770020b43 Mon Sep 17 00:00:00 2001
From: Harpreet Sangar <happy_san@protonmail.com>
Date: Fri, 3 Feb 2023 14:30:17 +0530
Subject: [PATCH 31/51] Fix double locking of collection mutex.

---
 test/collection_join_test.cpp | 142 ++++++++++++++++++++++++++++++++++
 1 file changed, 142 insertions(+)

diff --git a/test/collection_join_test.cpp b/test/collection_join_test.cpp
index b25439e6..9f22cdda 100644
--- a/test/collection_join_test.cpp
+++ b/test/collection_join_test.cpp
@@ -710,3 +710,145 @@ TEST_F(CollectionJoinTest, IncludeFieldsByReference_SingleMatch) {
 //    ASSERT_EQ(5, res_obj["hits"][0]["document"].size());
 //    ASSERT_EQ(1, res_obj["hits"][0]["document"].count("product_id_sequence_id"));
 }
+
+TEST_F(CollectionJoinTest, FilterByReferenceField_MultipleMatch) {
+    auto schema_json =
+            R"({
+                "name": "Users",
+                "fields": [
+                    {"name": "user_id", "type": "string"},
+                    {"name": "user_name", "type": "string"}
+                ]
+            })"_json;
+    std::vector<nlohmann::json> documents = {
+            R"({
+                "user_id": "user_a",
+                "user_name": "Roshan"
+            })"_json,
+            R"({
+                "user_id": "user_b",
+                "user_name": "Ruby"
+            })"_json,
+            R"({
+                "user_id": "user_c",
+                "user_name": "Joe"
+            })"_json,
+            R"({
+                "user_id": "user_d",
+                "user_name": "Aby"
+            })"_json
+    };
+    auto collection_create_op = collectionManager.create_collection(schema_json);
+    ASSERT_TRUE(collection_create_op.ok());
+    for (auto const &json: documents) {
+        auto add_op = collection_create_op.get()->add(json.dump());
+        if (!add_op.ok()) {
+            LOG(INFO) << add_op.error();
+        }
+        ASSERT_TRUE(add_op.ok());
+    }
+
+    schema_json =
+            R"({
+                "name": "Repos",
+                "fields": [
+                    {"name": "repo_id", "type": "string"},
+                    {"name": "repo_content", "type": "string"}
+                ]
+            })"_json;
+    documents = {
+            R"({
+                "repo_id": "repo_a",
+                "repo_content": "body1"
+            })"_json,
+            R"({
+                "repo_id": "repo_b",
+                "repo_content": "body2"
+            })"_json,
+            R"({
+                "repo_id": "repo_c",
+                "repo_content": "body3"
+            })"_json
+    };
+    collection_create_op = collectionManager.create_collection(schema_json);
+    ASSERT_TRUE(collection_create_op.ok());
+    for (auto const &json: documents) {
+        auto add_op = collection_create_op.get()->add(json.dump());
+        if (!add_op.ok()) {
+            LOG(INFO) << add_op.error();
+        }
+        ASSERT_TRUE(add_op.ok());
+    }
+
+    schema_json =
+            R"({
+                "name": "Links",
+                "fields": [
+                    {"name": "repo_id", "type": "string", "reference": "Repos.repo_id"},
+                    {"name": "user_id", "type": "string", "reference": "Users.user_id"}
+                ]
+            })"_json;
+    documents = {
+            R"({
+                "repo_id": "repo_a",
+                "user_id": "user_b"
+            })"_json,
+            R"({
+                "repo_id": "repo_a",
+                "user_id": "user_c"
+            })"_json,
+            R"({
+                "repo_id": "repo_b",
+                "user_id": "user_a"
+            })"_json,
+            R"({
+                "repo_id": "repo_b",
+                "user_id": "user_b"
+            })"_json,
+            R"({
+                "repo_id": "repo_b",
+                "user_id": "user_d"
+            })"_json,
+            R"({
+                "repo_id": "repo_c",
+                "user_id": "user_a"
+            })"_json,
+            R"({
+                "repo_id": "repo_c",
+                "user_id": "user_b"
+            })"_json,
+            R"({
+                "repo_id": "repo_c",
+                "user_id": "user_c"
+            })"_json,
+            R"({
+                "repo_id": "repo_c",
+                "user_id": "user_d"
+            })"_json
+    };
+    collection_create_op = collectionManager.create_collection(schema_json);
+    ASSERT_TRUE(collection_create_op.ok());
+
+    for (auto const &json: documents) {
+        auto add_op = collection_create_op.get()->add(json.dump());
+        if (!add_op.ok()) {
+            LOG(INFO) << add_op.error();
+        }
+        ASSERT_TRUE(add_op.ok());
+    }
+
+    auto coll = collectionManager.get_collection("Users");
+
+    // Search for users linked to repo_b
+    auto result = coll->search("R", {"user_name"}, "$Links(repo_id:=repo_b)", {}, {}, {0},
+                               10, 1, FREQUENCY, {true}, Index::DROP_TOKENS_THRESHOLD).get();
+
+    ASSERT_EQ(2, result["found"].get<size_t>());
+    ASSERT_EQ(2, result["hits"].size());
+    ASSERT_EQ("user_b", result["hits"][0]["document"]["user_id"].get<std::string>());
+    ASSERT_EQ("user_a", result["hits"][1]["document"]["user_id"].get<std::string>());
+
+//    collectionManager.drop_collection("Users");
+//    collectionManager.drop_collection("Repos");
+//    collectionManager.drop_collection("Links");
+}
\ No newline at end of file

From 0059f8d3fb95a978cbb774f5d31c9d70e63b7706 Mon Sep 17 00:00:00 2001
From: Harpreet Sangar <happy_san@protonmail.com>
Date: Tue, 7 Feb 2023 10:53:18 +0530
Subject: [PATCH 32/51] Fix tests.

---
 test/collection_join_test.cpp | 142 ----------------------------------
 1 file changed, 142 deletions(-)

diff --git a/test/collection_join_test.cpp b/test/collection_join_test.cpp
index 9f22cdda..b25439e6 100644
--- a/test/collection_join_test.cpp
+++ b/test/collection_join_test.cpp
@@ -710,145 +710,3 @@ TEST_F(CollectionJoinTest, IncludeFieldsByReference_SingleMatch) {
 //    ASSERT_EQ(5, res_obj["hits"][0]["document"].size());
 //    ASSERT_EQ(1, res_obj["hits"][0]["document"].count("product_id_sequence_id"));
 }
-
-TEST_F(CollectionJoinTest, FilterByReferenceField_MultipleMatch) {
-    auto schema_json =
-            R"({
-                "name": "Users",
-                "fields": [
-                    {"name": "user_id", "type": "string"},
-                    {"name": "user_name", "type": "string"}
-                ]
-            })"_json;
-    std::vector<nlohmann::json> documents = {
-            R"({
-                "user_id": "user_a",
-                "user_name": "Roshan"
-            })"_json,
-            R"({
-                "user_id": "user_b",
-                "user_name": "Ruby"
-            })"_json,
-            R"({
-                "user_id": "user_c",
-                "user_name": "Joe"
-            })"_json,
-            R"({
-                "user_id": "user_d",
-                "user_name": "Aby"
-            })"_json
-    };
-    auto collection_create_op = collectionManager.create_collection(schema_json);
-    ASSERT_TRUE(collection_create_op.ok());
-    for (auto const &json: documents) {
-        auto add_op = collection_create_op.get()->add(json.dump());
-        if (!add_op.ok()) {
-            LOG(INFO) << add_op.error();
-        }
-        ASSERT_TRUE(add_op.ok());
-    }
-
-    schema_json =
-            R"({
-                "name": "Repos",
-                "fields": [
-                    {"name": "repo_id", "type": "string"},
-                    {"name": "repo_content", "type": "string"}
-                ]
-            })"_json;
-    documents = {
-            R"({
-                "repo_id": "repo_a",
-                "repo_content": "body1"
-            })"_json,
-            R"({
-                "repo_id": "repo_b",
-                "repo_content": "body2"
-            })"_json,
-            R"({
-                "repo_id": "repo_c",
-                "repo_content": "body3"
-            })"_json
-    };
-    collection_create_op = collectionManager.create_collection(schema_json);
-    ASSERT_TRUE(collection_create_op.ok());
-    for (auto const &json: documents) {
-        auto add_op = collection_create_op.get()->add(json.dump());
-        if (!add_op.ok()) {
-            LOG(INFO) << add_op.error();
-        }
-        ASSERT_TRUE(add_op.ok());
-    }
-
-    schema_json =
-            R"({
-                "name": "Links",
-                "fields": [
-                    {"name": "repo_id", "type": "string", "reference": "Repos.repo_id"},
-                    {"name": "user_id", "type": "string", "reference": "Users.user_id"}
-                ]
-            })"_json;
-    documents = {
-            R"({
-                "repo_id": "repo_a",
-                "user_id": "user_b"
-            })"_json,
-            R"({
-                "repo_id": "repo_a",
-                "user_id": "user_c"
-            })"_json,
-            R"({
-                "repo_id": "repo_b",
-                "user_id": "user_a"
-            })"_json,
-            R"({
-                "repo_id": "repo_b",
-                "user_id": "user_b"
-            })"_json,
-            R"({
-                "repo_id": "repo_b",
-                "user_id": "user_d"
-            })"_json,
-            R"({
-                "repo_id": "repo_c",
-                "user_id": "user_a"
-            })"_json,
-            R"({
-                "repo_id": "repo_c",
-                "user_id": "user_b"
-            })"_json,
-            R"({
-                "repo_id": "repo_c",
-                "user_id": "user_c"
-            })"_json,
-            R"({
-                "repo_id": "repo_c",
-                "user_id": "user_d"
-            })"_json
-    };
-    collection_create_op = collectionManager.create_collection(schema_json);
-    ASSERT_TRUE(collection_create_op.ok());
-
-    for (auto const &json: documents) {
-        auto add_op = collection_create_op.get()->add(json.dump());
-        if (!add_op.ok()) {
-            LOG(INFO) << add_op.error();
-        }
-        ASSERT_TRUE(add_op.ok());
-    }
-
-    auto coll = collectionManager.get_collection("Users");
-
-    // Search for users linked to repo_b
-    auto result = coll->search("R", {"user_name"}, "$Links(repo_id:=repo_b)", {}, {}, {0},
-                               10, 1, FREQUENCY, {true}, Index::DROP_TOKENS_THRESHOLD).get();
-
-    ASSERT_EQ(2, result["found"].get<size_t>());
-    ASSERT_EQ(2, result["hits"].size());
-    ASSERT_EQ("user_b", result["hits"][0]["document"]["user_id"].get<std::string>());
-    ASSERT_EQ("user_a", result["hits"][1]["document"]["user_id"].get<std::string>());
-
-//    collectionManager.drop_collection("Users");
-//    collectionManager.drop_collection("Repos");
-//    collectionManager.drop_collection("Links");
-}
\ No newline at end of file

From 0c8edf941f01f2e5bf15e19df04b85817c7ff5c1 Mon Sep 17 00:00:00 2001
From: Harpreet Sangar <happy_san@protonmail.com>
Date: Thu, 9 Feb 2023 11:50:58 +0530
Subject: [PATCH 33/51] Reference `include_fields`.

---
 include/collection.h |  2 ++
 src/collection.cpp   | 25 ++++++++++++++++++-------
 2 files changed, 20 insertions(+), 7 deletions(-)

diff --git a/include/collection.h b/include/collection.h
index 38e91d1f..f0c16236 100644
--- a/include/collection.h
+++ b/include/collection.h
@@ -463,6 +463,8 @@ public:
 
     Option<bool> validate_reference_filter(const std::string& filter_query) const;
 
+    Option<std::string> get_reference_field(const std::string & collection_name) const;
+
     Option<bool> get_reference_filter_ids(const std::string & filter_query,
                                           const std::string & collection_name,
                                           std::pair<uint32_t, uint32_t*>& reference_index_ids) const;
diff --git a/src/collection.cpp b/src/collection.cpp
index c3ef5a49..6c61a24c 100644
--- a/src/collection.cpp
+++ b/src/collection.cpp
@@ -2580,9 +2580,7 @@ Option<bool> Collection::validate_reference_filter(const std::string& filter_que
     return Option<bool>(true);
 }
 
-Option<bool> Collection::get_reference_filter_ids(const std::string & filter_query,
-                                                  const std::string & collection_name,
-                                                  std::pair<uint32_t, uint32_t*>& reference_index_ids) const {
+Option<std::string> Collection::get_reference_field(const std::string & collection_name) const {
     std::shared_lock lock(mutex);
 
     std::string reference_field_name;
@@ -2595,10 +2593,23 @@ Option<bool> Collection::get_reference_filter_ids(const std::string & filter_que
     }
 
     if (reference_field_name.empty()) {
-        return Option<bool>(400, "Could not find any field in `" + name + "` referencing the collection `"
-                                                    + collection_name + "`.");
+        return Option<std::string>(400, "Could not find any field in `" + name + "` referencing the collection `"
+                                 + collection_name + "`.");
     }
 
+    return Option(reference_field_name);
+}
+
+Option<bool> Collection::get_reference_filter_ids(const std::string & filter_query,
+                                                  const std::string & collection_name,
+                                                  std::pair<uint32_t, uint32_t*>& reference_index_ids) const {
+    auto reference_field_op = get_reference_field(collection_name);
+    if (!reference_field_op.ok()) {
+        return Option<bool>(reference_field_op.code(), reference_field_op.error());
+    }
+
+    std::shared_lock lock(mutex);
+
     const std::string doc_id_prefix = std::to_string(collection_id) + "_" + DOC_ID_PREFIX + "_";
     filter_node_t* filter_tree_root = nullptr;
     Option<bool> filter_op = filter::parse_filter_query(filter_query, search_schema,
@@ -2608,8 +2619,8 @@ Option<bool> Collection::get_reference_filter_ids(const std::string & filter_que
     }
 
     // Reference helper field has the sequence id of other collection's documents.
-    reference_field_name += REFERENCE_HELPER_FIELD_SUFFIX;
-    index->do_reference_filtering_with_lock(reference_index_ids, filter_tree_root, reference_field_name);
+    auto field_name = reference_field_op.get() + REFERENCE_HELPER_FIELD_SUFFIX;
+    index->do_reference_filtering_with_lock(reference_index_ids, filter_tree_root, field_name);
 
     delete filter_tree_root;
     return Option<bool>(true);

From d2bc921f1d65fca4ca7cb57e237735cbc98710fb Mon Sep 17 00:00:00 2001
From: Harpreet Sangar <happy_san@protonmail.com>
Date: Thu, 9 Feb 2023 12:25:16 +0530
Subject: [PATCH 34/51] fix memory leak.

---
 src/collection.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/collection.cpp b/src/collection.cpp
index 6c61a24c..1026bb54 100644
--- a/src/collection.cpp
+++ b/src/collection.cpp
@@ -3999,6 +3999,8 @@ Option<bool> Collection::prune_doc(nlohmann::json& doc,
             reference_docs.push_back(ref_doc);
         }
 
+        delete[] documents[0].second;
+
         for (const auto &ref_doc: reference_docs) {
             doc.update(ref_doc);
         }

From 94add54c4354db6dd5213edf2ab212fa18a5e8b0 Mon Sep 17 00:00:00 2001
From: Harpreet Sangar <happy_san@protonmail.com>
Date: Wed, 15 Feb 2023 16:48:44 +0530
Subject: [PATCH 35/51] temp.

---
 include/collection.h |  4 ++--
 include/index.h      |  8 --------
 src/collection.cpp   | 19 ++++++++++---------
 3 files changed, 12 insertions(+), 19 deletions(-)

diff --git a/include/collection.h b/include/collection.h
index f0c16236..ab847a4c 100644
--- a/include/collection.h
+++ b/include/collection.h
@@ -466,8 +466,8 @@ public:
     Option<std::string> get_reference_field(const std::string & collection_name) const;
 
     Option<bool> get_reference_filter_ids(const std::string & filter_query,
-                                          const std::string & collection_name,
-                                          std::pair<uint32_t, uint32_t*>& reference_index_ids) const;
+                                          filter_result_t& filter_result,
+                                          const std::string & collection_name) const;
 
     Option<bool> validate_reference_filter(const std::string& filter_query) const;
 
diff --git a/include/index.h b/include/index.h
index 92b9f7af..0ce10daf 100644
--- a/include/index.h
+++ b/include/index.h
@@ -702,14 +702,6 @@ public:
                                                   const std::string& collection_name,
                                                   const std::string& reference_helper_field_name) const;
 
-    Option<bool> do_reference_filtering_with_lock(filter_node_t* const filter_tree_root,
-                                                  filter_result_t& filter_result,
-                                                  const std::string & reference_helper_field_name) const;
-
-    void do_reference_filtering_with_lock(std::pair<uint32_t, uint32_t*>& reference_index_ids,
-                                          filter_node_t* filter_tree_root,
-                                          const std::string& reference_helper_field_name) const;
-
     void refresh_schemas(const std::vector<field>& new_fields, const std::vector<field>& del_fields);
 
     // the following methods are not synchronized because their parent calls are synchronized or they are const/static
diff --git a/src/collection.cpp b/src/collection.cpp
index 1026bb54..45ae966a 100644
--- a/src/collection.cpp
+++ b/src/collection.cpp
@@ -2601,8 +2601,8 @@ Option<std::string> Collection::get_reference_field(const std::string & collecti
 }
 
 Option<bool> Collection::get_reference_filter_ids(const std::string & filter_query,
-                                                  const std::string & collection_name,
-                                                  std::pair<uint32_t, uint32_t*>& reference_index_ids) const {
+                                                  filter_result_t& filter_result,
+                                                  const std::string & collection_name) const {
     auto reference_field_op = get_reference_field(collection_name);
     if (!reference_field_op.ok()) {
         return Option<bool>(reference_field_op.code(), reference_field_op.error());
@@ -2612,15 +2612,18 @@ Option<bool> Collection::get_reference_filter_ids(const std::string & filter_que
 
     const std::string doc_id_prefix = std::to_string(collection_id) + "_" + DOC_ID_PREFIX + "_";
     filter_node_t* filter_tree_root = nullptr;
-    Option<bool> filter_op = filter::parse_filter_query(filter_query, search_schema,
-                                                        store, doc_id_prefix, filter_tree_root);
-    if(!filter_op.ok()) {
-        return filter_op;
+    Option<bool> parse_op = filter::parse_filter_query(filter_query, search_schema,
+                                                       store, doc_id_prefix, filter_tree_root);
+    if(!parse_op.ok()) {
+        return parse_op;
     }
 
     // Reference helper field has the sequence id of other collection's documents.
     auto field_name = reference_field_op.get() + REFERENCE_HELPER_FIELD_SUFFIX;
-    index->do_reference_filtering_with_lock(reference_index_ids, filter_tree_root, field_name);
+    auto filter_op = index->do_reference_filtering_with_lock(filter_tree_root, filter_result, field_name);
+    if (!filter_op.ok()) {
+        return filter_op;
+    }
 
     delete filter_tree_root;
     return Option<bool>(true);
@@ -3999,8 +4002,6 @@ Option<bool> Collection::prune_doc(nlohmann::json& doc,
             reference_docs.push_back(ref_doc);
         }
 
-        delete[] documents[0].second;
-
         for (const auto &ref_doc: reference_docs) {
             doc.update(ref_doc);
         }

From 2672b1ebd6e5b94bc626e564ce12775f794b757e Mon Sep 17 00:00:00 2001
From: ozanarmagan <ozan-armagan@yandex.com>
Date: Thu, 16 Feb 2023 14:45:43 +0300
Subject: [PATCH 36/51] Auto vector generation & Hybrid Search

---
 WORKSPACE                               |   5 +-
 bazel/foreign_cc.patch                  |   4 +-
 bazel/foreign_cc_version_compiler.patch | 283 ------------------------
 cmake/patch.sh                          |  18 ++
 include/collection.h                    |   8 -
 include/field.h                         |   4 +-
 src/collection.cpp                      |  66 +-----
 src/field.cpp                           |   2 +-
 8 files changed, 25 insertions(+), 365 deletions(-)
 delete mode 100644 bazel/foreign_cc_version_compiler.patch
 create mode 100644 cmake/patch.sh

diff --git a/WORKSPACE b/WORKSPACE
index 94d24423..1dc1401d 100644
--- a/WORKSPACE
+++ b/WORKSPACE
@@ -13,10 +13,7 @@ bazel_compdb_deps()
 
 http_archive(
     name = "rules_foreign_cc",
-    patches = ["//bazel:foreign_cc.patch", "//bazel:foreign_cc_version_compiler.patch"],
-    patch_args = [
-        "-p1",
-    ],
+    patches = ["//bazel:foreign_cc.patch"],
     sha256 = "2a4d07cd64b0719b39a7c12218a3e507672b82a97b98c6a89d38565894cf7c51",
     strip_prefix = "rules_foreign_cc-0.9.0",
     url = "https://github.com/bazelbuild/rules_foreign_cc/archive/refs/tags/0.9.0.tar.gz",
diff --git a/bazel/foreign_cc.patch b/bazel/foreign_cc.patch
index 9cb52c7c..9af0c8a7 100644
--- a/bazel/foreign_cc.patch
+++ b/bazel/foreign_cc.patch
@@ -1,5 +1,5 @@
---- a/foreign_cc/private/configure_script.bzl
-+++ b/foreign_cc/private/configure_script.bzl
+--- foreign_cc/private/configure_script.bzl
++++ foreign_cc/private/configure_script.bzl
 @@ -70,7 +70,7 @@
          ).lstrip())
  
diff --git a/bazel/foreign_cc_version_compiler.patch b/bazel/foreign_cc_version_compiler.patch
deleted file mode 100644
index d668a1db..00000000
--- a/bazel/foreign_cc_version_compiler.patch
+++ /dev/null
@@ -1,283 +0,0 @@
-diff --git a/foreign_cc/private/cc_toolchain_util.bzl b/foreign_cc/private/cc_toolchain_util.bzl
-index fd7fa4d..188dc5f 100644
---- a/foreign_cc/private/cc_toolchain_util.bzl
-+++ b/foreign_cc/private/cc_toolchain_util.bzl
-@@ -265,15 +265,24 @@ def get_tools_info(ctx):
-         cc_toolchain = cc_toolchain,
-     )
-
-+    cxx = cc_common.get_tool_for_action(
-+            feature_configuration = feature_configuration,
-+            action_name = ACTION_NAMES.cpp_compile,
-+        )
-+    cxx_splitted = cxx.split("/")
-+    if(cxx_splitted[-1] == "gcc"):
-+        cxx_splitted[-1] = "g++"
-+        cxx = "/".join(cxx_splitted)
-+    if(cxx_splitted[-1] == "clang"):
-+        cxx_splitted = "clang++"
-+        cxx = "/".join(cxx_splitted)
-+
-     return CxxToolsInfo(
-         cc = cc_common.get_tool_for_action(
-             feature_configuration = feature_configuration,
-             action_name = ACTION_NAMES.c_compile,
-         ),
--        cxx = cc_common.get_tool_for_action(
--            feature_configuration = feature_configuration,
--            action_name = ACTION_NAMES.cpp_compile,
--        ),
-+        cxx = cxx,
-         cxx_linker_static = cc_common.get_tool_for_action(
-             feature_configuration = feature_configuration,
-             action_name = ACTION_NAMES.cpp_link_static_library,
-diff --git a/toolchains/built_toolchains.bzl b/toolchains/built_toolchains.bzl
-index 5e59e79..ddf63a5 100644
---- a/toolchains/built_toolchains.bzl
-+++ b/toolchains/built_toolchains.bzl
-@@ -28,6 +28,7 @@ _CMAKE_SRCS = {
-     "3.22.4": [["https://github.com/Kitware/CMake/releases/download/v3.22.4/cmake-3.22.4.tar.gz"], "cmake-3.22.4", "5c55d0b0bc4c191549e3502b8f99a4fe892077611df22b4178cc020626e22a47"],
-     "3.23.1": [["https://github.com/Kitware/CMake/releases/download/v3.23.1/cmake-3.23.1.tar.gz"], "cmake-3.23.1", "33fd10a8ec687a4d0d5b42473f10459bb92b3ae7def2b745dc10b192760869f3"],
-     "3.23.2": [["https://github.com/Kitware/CMake/releases/download/v3.23.2/cmake-3.23.2.tar.gz"], "cmake-3.23.2", "f316b40053466f9a416adf981efda41b160ca859e97f6a484b447ea299ff26aa"],
-+    "3.25.0": [["https://github.com/Kitware/CMake/releases/download/v3.25.0/cmake-3.25.0.tar.gz"], "cmake-3.25.0", "306463f541555da0942e6f5a0736560f70c487178b9d94a5ae7f34d0538cdd48"],
- }
-
- # buildifier: disable=unnamed-macro
-@@ -438,6 +439,18 @@ def _ninja_toolchain(version, register_toolchains):
-         native.register_toolchains(
-             "@rules_foreign_cc//toolchains:built_ninja_toolchain",
-         )
-+    if version == "1.11.1":
-+        maybe(
-+            http_archive,
-+            name = "ninja_build_src",
-+            build_file_content = _ALL_CONTENT,
-+            sha256 = "31747ae633213f1eda3842686f83c2aa1412e0f5691d1c14dbbcc67fe7400cea",
-+            strip_prefix = "ninja-1.11.1",
-+            urls = [
-+                "https://github.com/ninja-build/ninja/archive/v1.11.1.tar.gz",
-+            ],
-+        )
-+        return
-     if version == "1.11.0":
-         maybe(
-             http_archive,
-diff --git a/toolchains/prebuilt_toolchains.bzl b/toolchains/prebuilt_toolchains.bzl
-index dabfb95..d9c38b4 100644
---- a/toolchains/prebuilt_toolchains.bzl
-+++ b/toolchains/prebuilt_toolchains.bzl
-@@ -67,6 +67,115 @@ def prebuilt_toolchains(cmake_version, ninja_version, register_toolchains):
-     _make_toolchains(register_toolchains)
-
- def _cmake_toolchains(version, register_toolchains):
-+    if "3.25.0" == version:
-+        maybe(
-+            http_archive,
-+            name = "cmake-3.25.0-linux-aarch64",
-+            urls = [
-+                "https://github.com/Kitware/CMake/releases/download/v3.25.0/cmake-3.25.0-linux-aarch64.tar.gz",
-+            ],
-+            sha256 = "27da36d6debe9b30f5c498554ae40cd621a55736f5f2ae2618ed95722a59965a",
-+            strip_prefix = "cmake-3.25.0-linux-aarch64",
-+            build_file_content = _CMAKE_BUILD_FILE.format(
-+                bin = "cmake",
-+                env = "{}",
-+            ),
-+        )
-+
-+        maybe(
-+            http_archive,
-+            name = "cmake-3.25.0-linux-x86_64",
-+            urls = [
-+                "https://github.com/Kitware/CMake/releases/download/v3.25.0/cmake-3.25.0-linux-x86_64.tar.gz",
-+            ],
-+            sha256 = "ac634d6f0a81d7089adc7be5acff66a6bee3b08615f9a947858ce92a9ef59c8b",
-+            strip_prefix = "cmake-3.25.0-linux-x86_64",
-+            build_file_content = _CMAKE_BUILD_FILE.format(
-+                bin = "cmake",
-+                env = "{}",
-+            ),
-+        )
-+
-+        maybe(
-+            http_archive,
-+            name = "cmake-3.25.0-macos-universal",
-+            urls = [
-+                "https://github.com/Kitware/CMake/releases/download/v3.25.0/cmake-3.25.0-macos-universal.tar.gz",
-+            ],
-+            sha256 = "c088e761534a2078cd9d0581d39f02d3f9ed05302e33135b55c6d619b263b4c3",
-+            strip_prefix = "cmake-3.25.0-macos-universal/CMake.app/Contents",
-+            build_file_content = _CMAKE_BUILD_FILE.format(
-+                bin = "cmake",
-+                env = "{}",
-+            ),
-+        )
-+
-+        maybe(
-+            http_archive,
-+            name = "cmake-3.25.0-windows-i386",
-+            urls = [
-+                "https://github.com/Kitware/CMake/releases/download/v3.25.0/cmake-3.25.0-windows-i386.zip",
-+            ],
-+            sha256 = "ddd115257a19ff3dd18fc63f32a00ae742f8b62d2e39bc354629903512f99783",
-+            strip_prefix = "cmake-3.25.0-windows-i386",
-+            build_file_content = _CMAKE_BUILD_FILE.format(
-+                bin = "cmake.exe",
-+                env = "{}",
-+            ),
-+        )
-+
-+        maybe(
-+            http_archive,
-+            name = "cmake-3.25.0-windows-x86_64",
-+            urls = [
-+                "https://github.com/Kitware/CMake/releases/download/v3.25.0/cmake-3.25.0-windows-x86_64.zip",
-+            ],
-+            sha256 = "b46030c10cab1170355952f9ac59f7e6dabc248070fc53f15dff11d4ed2910f8",
-+            strip_prefix = "cmake-3.25.0-windows-x86_64",
-+            build_file_content = _CMAKE_BUILD_FILE.format(
-+                bin = "cmake.exe",
-+                env = "{}",
-+            ),
-+        )
-+
-+        # buildifier: leave-alone
-+        maybe(
-+            prebuilt_toolchains_repository,
-+            name = "cmake_3.25.0_toolchains",
-+            repos = {
-+                "cmake-3.25.0-linux-aarch64": [
-+                    "@platforms//cpu:aarch64",
-+                    "@platforms//os:linux",
-+                ],
-+                "cmake-3.25.0-linux-x86_64": [
-+                    "@platforms//cpu:x86_64",
-+                    "@platforms//os:linux",
-+                ],
-+                "cmake-3.25.0-macos-universal": [
-+                    "@platforms//os:macos",
-+                ],
-+                "cmake-3.25.0-windows-i386": [
-+                    "@platforms//cpu:x86_32",
-+                    "@platforms//os:windows",
-+                ],
-+                "cmake-3.25.0-windows-x86_64": [
-+                    "@platforms//cpu:x86_64",
-+                    "@platforms//os:windows",
-+                ],
-+            },
-+            tool = "cmake",
-+        )
-+
-+        if register_toolchains:
-+            native.register_toolchains(
-+                "@cmake_3.25.0_toolchains//:cmake-3.25.0-linux-aarch64_toolchain",
-+                "@cmake_3.25.0_toolchains//:cmake-3.25.0-linux-x86_64_toolchain",
-+                "@cmake_3.25.0_toolchains//:cmake-3.25.0-macos-universal_toolchain",
-+                "@cmake_3.25.0_toolchains//:cmake-3.25.0-windows-i386_toolchain",
-+                "@cmake_3.25.0_toolchains//:cmake-3.25.0-windows-x86_64_toolchain",
-+            )
-+
-+        return
-     if "3.23.2" == version:
-         maybe(
-             http_archive,
-@@ -4196,6 +4305,78 @@ def _cmake_toolchains(version, register_toolchains):
-     fail("Unsupported version: " + str(version))
-
- def _ninja_toolchains(version, register_toolchains):
-+    if "1.11.1" == version:
-+        maybe(
-+            http_archive,
-+            name = "ninja_1.11.1_linux",
-+            urls = [
-+                "https://github.com/ninja-build/ninja/releases/download/v1.11.1/ninja-linux.zip",
-+            ],
-+            sha256 = "b901ba96e486dce377f9a070ed4ef3f79deb45f4ffe2938f8e7ddc69cfb3df77",
-+            strip_prefix = "",
-+            build_file_content = _NINJA_BUILD_FILE.format(
-+                bin = "ninja",
-+                env = "{\"NINJA\": \"$(execpath :ninja_bin)\"}",
-+            ),
-+        )
-+
-+        maybe(
-+            http_archive,
-+            name = "ninja_1.11.1_mac",
-+            urls = [
-+                "https://github.com/ninja-build/ninja/releases/download/v1.11.1/ninja-mac.zip",
-+            ],
-+            sha256 = "482ecb23c59ae3d4f158029112de172dd96bb0e97549c4b1ca32d8fad11f873e",
-+            strip_prefix = "",
-+            build_file_content = _NINJA_BUILD_FILE.format(
-+                bin = "ninja",
-+                env = "{\"NINJA\": \"$(execpath :ninja_bin)\"}",
-+            ),
-+        )
-+
-+        maybe(
-+            http_archive,
-+            name = "ninja_1.11.1_win",
-+            urls = [
-+                "https://github.com/ninja-build/ninja/releases/download/v1.11.1/ninja-win.zip",
-+            ],
-+            sha256 = "524b344a1a9a55005eaf868d991e090ab8ce07fa109f1820d40e74642e289abc",
-+            strip_prefix = "",
-+            build_file_content = _NINJA_BUILD_FILE.format(
-+                bin = "ninja.exe",
-+                env = "{\"NINJA\": \"$(execpath :ninja_bin)\"}",
-+            ),
-+        )
-+
-+        # buildifier: leave-alone
-+        maybe(
-+            prebuilt_toolchains_repository,
-+            name = "ninja_1.11.1_toolchains",
-+            repos = {
-+                "ninja_1.11.1_linux": [
-+                    "@platforms//cpu:x86_64",
-+                    "@platforms//os:linux",
-+                ],
-+                "ninja_1.11.1_mac": [
-+                    "@platforms//cpu:x86_64",
-+                    "@platforms//os:macos",
-+                ],
-+                "ninja_1.11.1_win": [
-+                    "@platforms//cpu:x86_64",
-+                    "@platforms//os:windows",
-+                ],
-+            },
-+            tool = "ninja",
-+        )
-+
-+        if register_toolchains:
-+            native.register_toolchains(
-+                "@ninja_1.11.1_toolchains//:ninja_1.11.1_linux_toolchain",
-+                "@ninja_1.11.1_toolchains//:ninja_1.11.1_mac_toolchain",
-+                "@ninja_1.11.1_toolchains//:ninja_1.11.1_win_toolchain",
-+            )
-+
-+        return
-     if "1.11.0" == version:
-         maybe(
-             http_archive,
-diff --git a/toolchains/prebuilt_toolchains.py b/toolchains/prebuilt_toolchains.py
-index 5288b27..a193021 100755
---- a/toolchains/prebuilt_toolchains.py
-+++ b/toolchains/prebuilt_toolchains.py
-@@ -10,6 +10,7 @@ CMAKE_SHA256_URL_TEMPLATE = "https://cmake.org/files/v{minor}/cmake-{full}-SHA-2
- CMAKE_URL_TEMPLATE = "https://github.com/Kitware/CMake/releases/download/v{full}/{file}"
-
- CMAKE_VERSIONS = [
-+    "3.25.0",
-     "3.23.2",
-     "3.23.1",
-     "3.22.4",
-@@ -116,6 +117,7 @@ NINJA_TARGETS = {
- }
-
- NINJA_VERSIONS = (
-+    "1.11.1",
-     "1.10.2",
-     "1.10.1",
-     "1.10.0",
diff --git a/cmake/patch.sh b/cmake/patch.sh
new file mode 100644
index 00000000..410c1254
--- /dev/null
+++ b/cmake/patch.sh
@@ -0,0 +1,18 @@
+#! /bin/sh
+
+set +x
+set -euo pipefail
+
+
+patch="$1"; shift
+
+# ignore the error if the patch is already applied
+if ! out=$(patch -p1 -N -r "rejects.bin" < "$patch")
+then
+    echo "$out" | grep -q "Reversed (or previously applied) patch detected!  Skipping patch."
+    test -s "rejects.bin" # Make sure we have rejects.
+else
+    test -f "rejects.bin" && ! test -s "rejects.bin" # Make sure we have no rejects.
+fi
+
+rm -f "rejects.bin"
\ No newline at end of file
diff --git a/include/collection.h b/include/collection.h
index ab847a4c..8d77cfde 100644
--- a/include/collection.h
+++ b/include/collection.h
@@ -463,14 +463,6 @@ public:
 
     Option<bool> validate_reference_filter(const std::string& filter_query) const;
 
-    Option<std::string> get_reference_field(const std::string & collection_name) const;
-
-    Option<bool> get_reference_filter_ids(const std::string & filter_query,
-                                          filter_result_t& filter_result,
-                                          const std::string & collection_name) const;
-
-    Option<bool> validate_reference_filter(const std::string& filter_query) const;
-
     Option<nlohmann::json> get(const std::string & id) const;
 
     Option<std::string> remove(const std::string & id, bool remove_from_store = true);
diff --git a/include/field.h b/include/field.h
index 63feff0f..305675ab 100644
--- a/include/field.h
+++ b/include/field.h
@@ -411,7 +411,7 @@ struct field {
 
     static Option<bool> json_field_to_field(bool enable_nested_fields, nlohmann::json& field_json,
                                             std::vector<field>& the_fields,
-                                            string& fallback_field_type, size_t& num_auto_detect_fields);
+                                            string& fallback_field_type, size_t& num_auto_detect_fields,const nlohmann::json& all_fields_json = nlohmann::json());
 
     static Option<bool> json_fields_to_fields(bool enable_nested_fields,
                                               nlohmann::json& fields_json,
@@ -475,7 +475,7 @@ struct field {
             }
 
             auto op = json_field_to_field(enable_nested_fields,
-                                          field_json, the_fields, fallback_field_type, num_auto_detect_fields);
+                                          field_json, the_fields, fallback_field_type, num_auto_detect_fields, fields_json);
             if(!op.ok()) {
                 return op;
             }
diff --git a/src/collection.cpp b/src/collection.cpp
index 45ae966a..af35b70d 100644
--- a/src/collection.cpp
+++ b/src/collection.cpp
@@ -246,6 +246,7 @@ nlohmann::json Collection::get_summary_json() const {
             field_json[fields::reference] = coll_field.reference;
         }
 
+
         fields_arr.push_back(field_json);
     }
 
@@ -2580,71 +2581,6 @@ Option<bool> Collection::validate_reference_filter(const std::string& filter_que
     return Option<bool>(true);
 }
 
-Option<std::string> Collection::get_reference_field(const std::string & collection_name) const {
-    std::shared_lock lock(mutex);
-
-    std::string reference_field_name;
-    for (auto const& pair: reference_fields) {
-        auto reference_pair = pair.second;
-        if (reference_pair.collection == collection_name) {
-            reference_field_name = reference_pair.field;
-            break;
-        }
-    }
-
-    if (reference_field_name.empty()) {
-        return Option<std::string>(400, "Could not find any field in `" + name + "` referencing the collection `"
-                                 + collection_name + "`.");
-    }
-
-    return Option(reference_field_name);
-}
-
-Option<bool> Collection::get_reference_filter_ids(const std::string & filter_query,
-                                                  filter_result_t& filter_result,
-                                                  const std::string & collection_name) const {
-    auto reference_field_op = get_reference_field(collection_name);
-    if (!reference_field_op.ok()) {
-        return Option<bool>(reference_field_op.code(), reference_field_op.error());
-    }
-
-    std::shared_lock lock(mutex);
-
-    const std::string doc_id_prefix = std::to_string(collection_id) + "_" + DOC_ID_PREFIX + "_";
-    filter_node_t* filter_tree_root = nullptr;
-    Option<bool> parse_op = filter::parse_filter_query(filter_query, search_schema,
-                                                       store, doc_id_prefix, filter_tree_root);
-    if(!parse_op.ok()) {
-        return parse_op;
-    }
-
-    // Reference helper field has the sequence id of other collection's documents.
-    auto field_name = reference_field_op.get() + REFERENCE_HELPER_FIELD_SUFFIX;
-    auto filter_op = index->do_reference_filtering_with_lock(filter_tree_root, filter_result, field_name);
-    if (!filter_op.ok()) {
-        return filter_op;
-    }
-
-    delete filter_tree_root;
-    return Option<bool>(true);
-}
-
-Option<bool> Collection::validate_reference_filter(const std::string& filter_query) const {
-    std::shared_lock lock(mutex);
-
-    const std::string doc_id_prefix = std::to_string(collection_id) + "_" + DOC_ID_PREFIX + "_";
-    filter_node_t* filter_tree_root = nullptr;
-    Option<bool> filter_op = filter::parse_filter_query(filter_query, search_schema,
-                                                        store, doc_id_prefix, filter_tree_root);
-
-    if(!filter_op.ok()) {
-        return filter_op;
-    }
-
-    delete filter_tree_root;
-    return Option<bool>(true);
-}
-
 bool Collection::facet_value_to_string(const facet &a_facet, const facet_count_t &facet_count,
                                        const nlohmann::json &document, std::string &value) const {
 
diff --git a/src/field.cpp b/src/field.cpp
index 129c7512..be2000b0 100644
--- a/src/field.cpp
+++ b/src/field.cpp
@@ -523,7 +523,7 @@ Option<bool> filter::parse_filter_query(const std::string& filter_query,
 
 Option<bool> field::json_field_to_field(bool enable_nested_fields, nlohmann::json& field_json,
                                         std::vector<field>& the_fields,
-                                        string& fallback_field_type, size_t& num_auto_detect_fields) {
+                                        string& fallback_field_type, size_t& num_auto_detect_fields, const nlohmann::json& all_fields_json) {
 
     if(field_json["name"] == "id") {
         // No field should exist with the name "id" as it is reserved for internal use

From aee771cebcb2a2afefd36c25ba534aa47d454281 Mon Sep 17 00:00:00 2001
From: ozanarmagan <ozan-armagan@yandex.com>
Date: Tue, 21 Feb 2023 13:02:47 +0300
Subject: [PATCH 37/51] Review Changes

---
 include/field.h | 5 ++---
 src/field.cpp   | 2 +-
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/include/field.h b/include/field.h
index 305675ab..18a1d4b7 100644
--- a/include/field.h
+++ b/include/field.h
@@ -11,7 +11,6 @@
 #include <tsl/htrie_map.h>
 #include "json.hpp"
 #include "text_embedder_manager.h"
-#include <regex>
 
 namespace field_types {
     // first field value indexed will determine the type
@@ -411,7 +410,7 @@ struct field {
 
     static Option<bool> json_field_to_field(bool enable_nested_fields, nlohmann::json& field_json,
                                             std::vector<field>& the_fields,
-                                            string& fallback_field_type, size_t& num_auto_detect_fields,const nlohmann::json& all_fields_json = nlohmann::json());
+                                            string& fallback_field_type, size_t& num_auto_detect_fields);
 
     static Option<bool> json_fields_to_fields(bool enable_nested_fields,
                                               nlohmann::json& fields_json,
@@ -475,7 +474,7 @@ struct field {
             }
 
             auto op = json_field_to_field(enable_nested_fields,
-                                          field_json, the_fields, fallback_field_type, num_auto_detect_fields, fields_json);
+                                          field_json, the_fields, fallback_field_type, num_auto_detect_fields);
             if(!op.ok()) {
                 return op;
             }
diff --git a/src/field.cpp b/src/field.cpp
index be2000b0..129c7512 100644
--- a/src/field.cpp
+++ b/src/field.cpp
@@ -523,7 +523,7 @@ Option<bool> filter::parse_filter_query(const std::string& filter_query,
 
 Option<bool> field::json_field_to_field(bool enable_nested_fields, nlohmann::json& field_json,
                                         std::vector<field>& the_fields,
-                                        string& fallback_field_type, size_t& num_auto_detect_fields, const nlohmann::json& all_fields_json) {
+                                        string& fallback_field_type, size_t& num_auto_detect_fields) {
 
     if(field_json["name"] == "id") {
         // No field should exist with the name "id" as it is reserved for internal use

From ff9cc895e2797cfb0def62d4914c7e23ef4d9773 Mon Sep 17 00:00:00 2001
From: Harpreet Sangar <happy_san@protonmail.com>
Date: Tue, 7 Mar 2023 11:13:36 +0530
Subject: [PATCH 38/51] Undo static linking change.

---
 .bazelrc | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.bazelrc b/.bazelrc
index 933545b7..dd960251 100644
--- a/.bazelrc
+++ b/.bazelrc
@@ -5,3 +5,5 @@ build --cxxopt="-std=c++17"
 
 test --jobs=6
 build --enable_platform_specific_config
+
+build:linux --action_env=BAZEL_LINKLIBS="-l%:libstdc++.a -l%:libgcc.a"
\ No newline at end of file

From 84787510c8b956a0152e0f2f342cc63092e9a0b7 Mon Sep 17 00:00:00 2001
From: Harpreet Sangar <happy_san@protonmail.com>
Date: Tue, 7 Mar 2023 14:19:36 +0530
Subject: [PATCH 39/51] Handle reference filter during approximation.

---
 include/collection.h |  3 ++
 include/index.h      |  7 ++++
 src/collection.cpp   | 32 ++++++++++-----
 src/index.cpp        | 98 +++++++++++++++++++++++++++-----------------
 4 files changed, 92 insertions(+), 48 deletions(-)

diff --git a/include/collection.h b/include/collection.h
index 8d77cfde..4e08d4da 100644
--- a/include/collection.h
+++ b/include/collection.h
@@ -457,6 +457,9 @@ public:
 
     Option<bool> get_filter_ids(const std::string & filter_query, filter_result_t& filter_result) const;
 
+    Option<bool> get_approximate_reference_filter_ids(const std::string& filter_query,
+                                                      uint32_t& filter_ids_length) const;
+
     Option<bool> get_reference_filter_ids(const std::string& filter_query,
                                           filter_result_t& filter_result,
                                           const std::string& collection_name) const;
diff --git a/include/index.h b/include/index.h
index 0ce10daf..f7b5f3fe 100644
--- a/include/index.h
+++ b/include/index.h
@@ -486,6 +486,10 @@ private:
                                     const int64_t& range_end_value,
                                     uint32_t& filter_ids_length) const;
 
+    Option<bool> approximate_filter_ids(const filter& a_filter,
+                                        uint32_t& filter_ids_length,
+                                        const std::string& collection_name) const;
+
     Option<bool> rearranging_recursive_filter(filter_node_t* const filter_tree_root,
                                               filter_result_t& result,
                                               const std::string& collection_name = "") const;
@@ -702,6 +706,9 @@ public:
                                                   const std::string& collection_name,
                                                   const std::string& reference_helper_field_name) const;
 
+    Option<bool> get_approximate_reference_filter_ids_with_lock(filter_node_t* const filter_tree_root,
+                                                                uint32_t& filter_ids_length) const;
+
     void refresh_schemas(const std::vector<field>& new_fields, const std::vector<field>& del_fields);
 
     // the following methods are not synchronized because their parent calls are synchronized or they are const/static
diff --git a/src/collection.cpp b/src/collection.cpp
index af35b70d..9d1c0635 100644
--- a/src/collection.cpp
+++ b/src/collection.cpp
@@ -2513,10 +2513,9 @@ Option<bool> Collection::get_filter_ids(const std::string& filter_query, filter_
         return filter_op;
     }
 
-    index->do_filtering_with_lock(filter_tree_root, filter_result, name);
+    std::unique_ptr<filter_node_t> filter_tree_root_guard(filter_tree_root);
 
-    delete filter_tree_root;
-    return Option<bool>(true);
+    return index->do_filtering_with_lock(filter_tree_root, filter_result, name);
 }
 
 Option<std::string> Collection::get_reference_field(const std::string & collection_name) const {
@@ -2537,6 +2536,23 @@ Option<std::string> Collection::get_reference_field(const std::string & collecti
     return Option(reference_field_name);
 }
 
+Option<bool> Collection::get_approximate_reference_filter_ids(const std::string& filter_query,
+                                                              uint32_t& filter_ids_length) const {
+    std::shared_lock lock(mutex);
+
+    const std::string doc_id_prefix = std::to_string(collection_id) + "_" + DOC_ID_PREFIX + "_";
+    filter_node_t* filter_tree_root = nullptr;
+    Option<bool> parse_op = filter::parse_filter_query(filter_query, search_schema,
+                                                       store, doc_id_prefix, filter_tree_root);
+    if(!parse_op.ok()) {
+        return parse_op;
+    }
+
+    std::unique_ptr<filter_node_t> filter_tree_root_guard(filter_tree_root);
+
+    return index->get_approximate_reference_filter_ids_with_lock(filter_tree_root, filter_ids_length);
+}
+
 Option<bool> Collection::get_reference_filter_ids(const std::string & filter_query,
                                                   filter_result_t& filter_result,
                                                   const std::string & collection_name) const {
@@ -2555,15 +2571,11 @@ Option<bool> Collection::get_reference_filter_ids(const std::string & filter_que
         return parse_op;
     }
 
+    std::unique_ptr<filter_node_t> filter_tree_root_guard(filter_tree_root);
+
     // Reference helper field has the sequence id of other collection's documents.
     auto field_name = reference_field_op.get() + REFERENCE_HELPER_FIELD_SUFFIX;
-    auto filter_op = index->do_reference_filtering_with_lock(filter_tree_root, filter_result, name, field_name);
-    if (!filter_op.ok()) {
-        return filter_op;
-    }
-
-    delete filter_tree_root;
-    return Option<bool>(true);
+    return index->do_reference_filtering_with_lock(filter_tree_root, filter_result, name, field_name);
 }
 
 Option<bool> Collection::validate_reference_filter(const std::string& filter_query) const {
diff --git a/src/index.cpp b/src/index.cpp
index e88badc0..b704356d 100644
--- a/src/index.cpp
+++ b/src/index.cpp
@@ -1952,45 +1952,19 @@ void Index::aproximate_numerical_match(num_tree_t* const num_tree,
     num_tree->approx_search_count(comparator, value, filter_ids_length);
 }
 
-Option<bool> Index::rearrange_filter_tree(filter_node_t* const root,
-                                          uint32_t& filter_ids_length,
-                                          const std::string& collection_name) const {
-    if (root == nullptr) {
-        return Option(true);
+Option<bool> Index::approximate_filter_ids(const filter& a_filter,
+                                           uint32_t& filter_ids_length,
+                                           const std::string& collection_name) const {
+    if (!a_filter.referenced_collection_name.empty()) {
+        auto& cm = CollectionManager::get_instance();
+        auto collection = cm.get_collection(a_filter.referenced_collection_name);
+        if (collection == nullptr) {
+            return Option<bool>(400, "Referenced collection `" + a_filter.referenced_collection_name + "` not found.");
+        }
+
+        return collection->get_approximate_reference_filter_ids(a_filter.field_name, filter_ids_length);
     }
 
-    if (root->isOperator) {
-        uint32_t l_filter_ids_length = 0;
-        if (root->left != nullptr) {
-            auto rearrange_op = rearrange_filter_tree(root->left, l_filter_ids_length, collection_name);
-            if (!rearrange_op.ok()) {
-                return rearrange_op;
-            }
-        }
-
-        uint32_t r_filter_ids_length = 0;
-        if (root->right != nullptr) {
-            auto rearrange_op = rearrange_filter_tree(root->right, r_filter_ids_length, collection_name);
-            if (!rearrange_op.ok()) {
-                return rearrange_op;
-            }
-        }
-
-        if (root->filter_operator == AND) {
-            filter_ids_length = std::min(l_filter_ids_length, r_filter_ids_length);
-        } else {
-            filter_ids_length = l_filter_ids_length + r_filter_ids_length;
-        }
-
-        if (l_filter_ids_length > r_filter_ids_length) {
-            std::swap(root->left, root->right);
-        }
-
-        return Option(true);
-    }
-
-    auto a_filter = root->filter_exp;
-
     if (a_filter.field_name == "id") {
         filter_ids_length = a_filter.values.size();
         return Option(true);
@@ -2062,7 +2036,7 @@ Option<bool> Index::rearrange_filter_tree(filter_node_t* const root,
 
             while (tokenizer.next(str_token, token_index)) {
                 auto const leaf = (art_leaf *) art_search(t, (const unsigned char*) str_token.c_str(),
-                                                         str_token.length()+1);
+                                                          str_token.length()+1);
                 if (leaf == nullptr) {
                     continue;
                 }
@@ -2080,6 +2054,47 @@ Option<bool> Index::rearrange_filter_tree(filter_node_t* const root,
     return Option(true);
 }
 
+Option<bool> Index::rearrange_filter_tree(filter_node_t* const root,
+                                          uint32_t& filter_ids_length,
+                                          const std::string& collection_name) const {
+    if (root == nullptr) {
+        return Option(true);
+    }
+
+    if (root->isOperator) {
+        uint32_t l_filter_ids_length = 0;
+        if (root->left != nullptr) {
+            auto rearrange_op = rearrange_filter_tree(root->left, l_filter_ids_length, collection_name);
+            if (!rearrange_op.ok()) {
+                return rearrange_op;
+            }
+        }
+
+        uint32_t r_filter_ids_length = 0;
+        if (root->right != nullptr) {
+            auto rearrange_op = rearrange_filter_tree(root->right, r_filter_ids_length, collection_name);
+            if (!rearrange_op.ok()) {
+                return rearrange_op;
+            }
+        }
+
+        if (root->filter_operator == AND) {
+            filter_ids_length = std::min(l_filter_ids_length, r_filter_ids_length);
+        } else {
+            filter_ids_length = l_filter_ids_length + r_filter_ids_length;
+        }
+
+        if (l_filter_ids_length > r_filter_ids_length) {
+            std::swap(root->left, root->right);
+        }
+
+        return Option(true);
+    }
+
+    approximate_filter_ids(root->filter_exp, filter_ids_length, collection_name);
+    return Option(true);
+}
+
 Option<bool> Index::rearranging_recursive_filter(filter_node_t* const filter_tree_root,
                                                  filter_result_t& result,
                                                  const std::string& collection_name) const {
@@ -2233,6 +2248,13 @@ Option<bool> Index::do_reference_filtering_with_lock(filter_node_t* const filter
     return Option(true);
 }
 
+Option<bool> Index::get_approximate_reference_filter_ids_with_lock(filter_node_t* const filter_tree_root,
+                                                                   uint32_t& filter_ids_length) const {
+    std::shared_lock lock(mutex);
+
+    return rearrange_filter_tree(filter_tree_root, filter_ids_length);
+}
+
 Option<bool> Index::run_search(search_args* search_params, const std::string& collection_name) {
     return search(search_params->field_query_tokens,
            search_params->search_fields,

From 4ae42c45cbce6447da7c9edce8c1c141e8fddea5 Mon Sep 17 00:00:00 2001
From: Harpreet Sangar <happy_san@protonmail.com>
Date: Tue, 7 Mar 2023 16:13:10 +0530
Subject: [PATCH 40/51] Delete `patch.sh`.

---
 cmake/patch.sh | 18 ------------------
 1 file changed, 18 deletions(-)
 delete mode 100644 cmake/patch.sh

diff --git a/cmake/patch.sh b/cmake/patch.sh
deleted file mode 100644
index 410c1254..00000000
--- a/cmake/patch.sh
+++ /dev/null
@@ -1,18 +0,0 @@
-#! /bin/sh
-
-set +x
-set -euo pipefail
-
-
-patch="$1"; shift
-
-# ignore the error if the patch is already applied
-if ! out=$(patch -p1 -N -r "rejects.bin" < "$patch")
-then
-    echo "$out" | grep -q "Reversed (or previously applied) patch detected!  Skipping patch."
-    test -s "rejects.bin" # Make sure we have rejects.
-else
-    test -f "rejects.bin" && ! test -s "rejects.bin" # Make sure we have no rejects.
-fi
-
-rm -f "rejects.bin"
\ No newline at end of file

From c6386b0c2f2ccc0af7cf3fe8dbbaf577ee444c8d Mon Sep 17 00:00:00 2001
From: Harpreet Sangar <happy_san@protonmail.com>
Date: Tue, 7 Mar 2023 18:28:19 +0530
Subject: [PATCH 41/51] Add tests for rearranging filter tree and approx filter
 match count.

---
 include/field.h                        |   2 +-
 test/collection_specific_more_test.cpp | 206 +++++++++++++++++++++++++
 2 files changed, 207 insertions(+), 1 deletion(-)

diff --git a/include/field.h b/include/field.h
index 18a1d4b7..7eee5c79 100644
--- a/include/field.h
+++ b/include/field.h
@@ -513,7 +513,7 @@ struct filter {
     bool apply_not_equals = false;
 
     // Would store `Foo` in case of a filter expression like `$Foo(bar := baz)`
-    std::string referenced_collection_name;
+    std::string referenced_collection_name = "";
 
     static const std::string RANGE_OPERATOR() {
         return "..";
diff --git a/test/collection_specific_more_test.cpp b/test/collection_specific_more_test.cpp
index 070b0580..8afdaf84 100644
--- a/test/collection_specific_more_test.cpp
+++ b/test/collection_specific_more_test.cpp
@@ -1973,3 +1973,209 @@ TEST_F(CollectionSpecificMoreTest, CrossFieldTypoAndPrefixWithWeights) {
                         "<mark>", "</mark>", {2, 3}).get();
     ASSERT_EQ(1, res["hits"].size());
 }
+
+TEST_F(CollectionSpecificMoreTest, RearrangingFilterTree) {
+    nlohmann::json schema =
+            R"({
+                "name": "Collection",
+                "fields": [
+                    {"name": "name", "type": "string"},
+                    {"name": "age", "type": "int32"},
+                    {"name": "years", "type": "int32[]"},
+                    {"name": "rating", "type": "float"}
+                ]
+            })"_json;
+
+    Collection* coll = collectionManager.create_collection(schema).get();
+
+    std::ifstream infile(std::string(ROOT_DIR)+"test/numeric_array_documents.jsonl");
+    std::string json_line;
+    while (std::getline(infile, json_line)) {
+        auto add_op = coll->add(json_line);
+        ASSERT_TRUE(add_op.ok());
+    }
+    infile.close();
+
+    const std::string doc_id_prefix = std::to_string(coll->get_collection_id()) + "_" + Collection::DOC_ID_PREFIX + "_";
+    filter_node_t* filter_tree_root = nullptr;
+    Option<bool> filter_op = filter::parse_filter_query("years:>2000 && ((age:<30 && rating:>5) || (age:>50 && rating:<5))",
+                                                        coll->get_schema(), store, doc_id_prefix, filter_tree_root);
+    ASSERT_TRUE(filter_op.ok());
+    std::unique_ptr<filter_node_t> filter_tree_root_guard(filter_tree_root);
+
+    //           &&
+    //         /    \
+    //   years>2000  ||
+    //       4      /  \
+    //             /    &&
+    //           &&    /   \
+    //          /  \ age>50 rating<5
+    //         /    \   1        2
+    //        /      \
+    //    age<30  rating>5
+    //      2         3
+    ASSERT_TRUE(filter_tree_root != nullptr);
+    ASSERT_TRUE(filter_tree_root->isOperator);
+    ASSERT_EQ(filter_tree_root->filter_operator, AND);
+
+    auto root = filter_tree_root->left;
+    ASSERT_TRUE(root != nullptr);
+    ASSERT_FALSE(root->isOperator);
+    ASSERT_EQ(root->filter_exp.field_name, "years");
+    ASSERT_TRUE(root->left == nullptr);
+    ASSERT_TRUE(root->right == nullptr);
+
+    root = filter_tree_root->right;
+    ASSERT_TRUE(root != nullptr);
+    ASSERT_TRUE(root->isOperator);
+    ASSERT_EQ(root->filter_operator, OR);
+
+    root = filter_tree_root->right->left;
+    ASSERT_TRUE(root != nullptr);
+    ASSERT_TRUE(root->isOperator);
+    ASSERT_EQ(root->filter_operator, AND);
+
+    root = filter_tree_root->right->left->left;
+    ASSERT_TRUE(root != nullptr);
+    ASSERT_FALSE(root->isOperator);
+    ASSERT_EQ(root->filter_exp.field_name, "age");
+    ASSERT_EQ(root->filter_exp.comparators.front(), LESS_THAN);
+    ASSERT_EQ(root->filter_exp.values.front(), "30");
+    ASSERT_TRUE(root->left == nullptr);
+    ASSERT_TRUE(root->right == nullptr);
+
+    root = filter_tree_root->right->left->right;
+    ASSERT_TRUE(root != nullptr);
+    ASSERT_FALSE(root->isOperator);
+    ASSERT_EQ(root->filter_exp.field_name, "rating");
+    ASSERT_EQ(root->filter_exp.comparators.front(), GREATER_THAN);
+    ASSERT_EQ(root->filter_exp.values.front(), "5");
+    ASSERT_TRUE(root->left == nullptr);
+    ASSERT_TRUE(root->right == nullptr);
+
+    root = filter_tree_root->right->right;
+    ASSERT_TRUE(root != nullptr);
+    ASSERT_TRUE(root->isOperator);
+    ASSERT_EQ(root->filter_operator, AND);
+
+    root = filter_tree_root->right->right->left;
+    ASSERT_TRUE(root != nullptr);
+    ASSERT_FALSE(root->isOperator);
+    ASSERT_EQ(root->filter_exp.field_name, "age");
+    ASSERT_EQ(root->filter_exp.comparators.front(), GREATER_THAN);
+    ASSERT_EQ(root->filter_exp.values.front(), "50");
+    ASSERT_TRUE(root->left == nullptr);
+    ASSERT_TRUE(root->right == nullptr);
+
+    root = filter_tree_root->right->right->right;
+    ASSERT_TRUE(root != nullptr);
+    ASSERT_FALSE(root->isOperator);
+    ASSERT_EQ(root->filter_exp.field_name, "rating");
+    ASSERT_EQ(root->filter_exp.comparators.front(), LESS_THAN);
+    ASSERT_EQ(root->filter_exp.values.front(), "5");
+    ASSERT_TRUE(root->left == nullptr);
+    ASSERT_TRUE(root->right == nullptr);
+
+    filter_result_t result;
+    // Internally calls rearranging_recursive_filter
+    coll->_get_index()->do_filtering_with_lock(filter_tree_root, result);
+
+    //                 &&
+    //               /    \
+    //             ||    years>2000
+    //           /    \
+    //         &&       \
+    //       /   \        \
+    //  age>50  rating<5   &&
+    //                   /    \
+    //               age<30  rating>5
+    ASSERT_TRUE(filter_tree_root != nullptr);
+    ASSERT_TRUE(filter_tree_root->isOperator);
+    ASSERT_EQ(filter_tree_root->filter_operator, AND);
+
+    root = filter_tree_root->left;
+    ASSERT_TRUE(root != nullptr);
+    ASSERT_TRUE(root->isOperator);
+    ASSERT_EQ(root->filter_operator, OR);
+
+    root = filter_tree_root->left->left;
+    ASSERT_TRUE(root != nullptr);
+    ASSERT_TRUE(root->isOperator);
+    ASSERT_EQ(root->filter_operator, AND);
+
+    root = filter_tree_root->left->left->left;
+    ASSERT_TRUE(root != nullptr);
+    ASSERT_FALSE(root->isOperator);
+    ASSERT_EQ(root->filter_exp.field_name, "age");
+    ASSERT_EQ(root->filter_exp.comparators.front(), GREATER_THAN);
+    ASSERT_EQ(root->filter_exp.values.front(), "50");
+    ASSERT_TRUE(root->left == nullptr);
+    ASSERT_TRUE(root->right == nullptr);
+
+    root = filter_tree_root->left->left->right;
+    ASSERT_TRUE(root != nullptr);
+    ASSERT_FALSE(root->isOperator);
+    ASSERT_EQ(root->filter_exp.field_name, "rating");
+    ASSERT_EQ(root->filter_exp.comparators.front(), LESS_THAN);
+    ASSERT_EQ(root->filter_exp.values.front(), "5");
+    ASSERT_TRUE(root->left == nullptr);
+    ASSERT_TRUE(root->right == nullptr);
+
+    root = filter_tree_root->left->right;
+    ASSERT_TRUE(root != nullptr);
+    ASSERT_TRUE(root->isOperator);
+    ASSERT_EQ(root->filter_operator, AND);
+
+    root = filter_tree_root->left->right->left;
+    ASSERT_TRUE(root != nullptr);
+    ASSERT_FALSE(root->isOperator);
+    ASSERT_EQ(root->filter_exp.field_name, "age");
+    ASSERT_EQ(root->filter_exp.comparators.front(), LESS_THAN);
+    ASSERT_EQ(root->filter_exp.values.front(), "30");
+    ASSERT_TRUE(root->left == nullptr);
+    ASSERT_TRUE(root->right == nullptr);
+
+    root = filter_tree_root->left->right->right;
+    ASSERT_TRUE(root != nullptr);
+    ASSERT_FALSE(root->isOperator);
+    ASSERT_EQ(root->filter_exp.field_name, "rating");
+    ASSERT_EQ(root->filter_exp.comparators.front(), GREATER_THAN);
+    ASSERT_EQ(root->filter_exp.values.front(), "5");
+    ASSERT_TRUE(root->left == nullptr);
+    ASSERT_TRUE(root->right == nullptr);
+
+    root = filter_tree_root->right;
+    ASSERT_TRUE(root != nullptr);
+    ASSERT_FALSE(root->isOperator);
+    ASSERT_EQ(root->filter_exp.field_name, "years");
+    ASSERT_TRUE(root->left == nullptr);
+    ASSERT_TRUE(root->right == nullptr);
+}
+
+TEST_F(CollectionSpecificMoreTest, ApproxFilterMatchCount) {
+    nlohmann::json schema =
+            R"({
+                "name": "Collection",
+                "fields": [
+                    {"name": "name", "type": "string"},
+                    {"name": "age", "type": "int32"},
+                    {"name": "years", "type": "int32[]"},
+                    {"name": "rating", "type": "float"}
+                ]
+            })"_json;
+
+    Collection *coll = collectionManager.create_collection(schema).get();
+
+    std::ifstream infile(std::string(ROOT_DIR) + "test/numeric_array_documents.jsonl");
+    std::string json_line;
+    while (std::getline(infile, json_line)) {
+        auto add_op = coll->add(json_line);
+        ASSERT_TRUE(add_op.ok());
+    }
+    infile.close();
+
+    uint32_t approx_count;
+    coll->get_approximate_reference_filter_ids("years:>2000 && ((age:<30 && rating:>5) || (age:>50 && rating:<5))",
+                                               approx_count);
+    ASSERT_EQ(approx_count, 3);
+}
\ No newline at end of file

From db555d36ad4392fb9be38b181bf5f7a99416d566 Mon Sep 17 00:00:00 2001
From: Harpreet Sangar <happy_san@protonmail.com>
Date: Thu, 9 Mar 2023 10:51:22 +0530
Subject: [PATCH 42/51] Refactor tests.

---
 include/index.h                        | 24 ++++++++---------
 src/index.cpp                          | 18 ++++++-------
 test/collection_specific_more_test.cpp | 36 ++++++++++++++++++++++----
 3 files changed, 52 insertions(+), 26 deletions(-)

diff --git a/include/index.h b/include/index.h
index f7b5f3fe..ff8431c8 100644
--- a/include/index.h
+++ b/include/index.h
@@ -486,14 +486,6 @@ private:
                                     const int64_t& range_end_value,
                                     uint32_t& filter_ids_length) const;
 
-    Option<bool> approximate_filter_ids(const filter& a_filter,
-                                        uint32_t& filter_ids_length,
-                                        const std::string& collection_name) const;
-
-    Option<bool> rearranging_recursive_filter(filter_node_t* const filter_tree_root,
-                                              filter_result_t& result,
-                                              const std::string& collection_name = "") const;
-
     Option<bool> recursive_filter(filter_node_t* const root,
                                   filter_result_t& result,
                                   const std::string& collection_name = "") const;
@@ -502,10 +494,6 @@ private:
                                  filter_result_t& result,
                                  const std::string& collection_name = "") const;
 
-    Option<bool> rearrange_filter_tree(filter_node_t* const root,
-                                       uint32_t& filter_ids_length,
-                                       const std::string& collection_name = "") const;
-
     void insert_doc(const int64_t score, art_tree *t, uint32_t seq_id,
                     const std::unordered_map<std::string, std::vector<uint32_t>> &token_to_offsets) const;
 
@@ -701,6 +689,18 @@ public:
                                         filter_result_t& filter_result,
                                         const std::string& collection_name = "") const;
 
+    Option<bool> _rearranging_recursive_filter(filter_node_t* const filter_tree_root,
+                                              filter_result_t& result,
+                                              const std::string& collection_name = "") const;
+
+    Option<bool> _rearrange_filter_tree(filter_node_t* const root,
+                                       uint32_t& filter_ids_length,
+                                       const std::string& collection_name = "") const;
+
+    Option<bool> _approximate_filter_ids(const filter& a_filter,
+                                        uint32_t& filter_ids_length,
+                                        const std::string& collection_name = "") const;
+
     Option<bool> do_reference_filtering_with_lock(filter_node_t* const filter_tree_root,
                                                   filter_result_t& filter_result,
                                                   const std::string& collection_name,
diff --git a/src/index.cpp b/src/index.cpp
index b704356d..124cf567 100644
--- a/src/index.cpp
+++ b/src/index.cpp
@@ -1952,7 +1952,7 @@ void Index::aproximate_numerical_match(num_tree_t* const num_tree,
     num_tree->approx_search_count(comparator, value, filter_ids_length);
 }
 
-Option<bool> Index::approximate_filter_ids(const filter& a_filter,
+Option<bool> Index::_approximate_filter_ids(const filter& a_filter,
                                            uint32_t& filter_ids_length,
                                            const std::string& collection_name) const {
     if (!a_filter.referenced_collection_name.empty()) {
@@ -2054,7 +2054,7 @@ Option<bool> Index::approximate_filter_ids(const filter& a_filter,
     return Option(true);
 }
 
-Option<bool> Index::rearrange_filter_tree(filter_node_t* const root,
+Option<bool> Index::_rearrange_filter_tree(filter_node_t* const root,
                                           uint32_t& filter_ids_length,
                                           const std::string& collection_name) const {
     if (root == nullptr) {
@@ -2064,7 +2064,7 @@ Option<bool> Index::rearrange_filter_tree(filter_node_t* const root,
     if (root->isOperator) {
         uint32_t l_filter_ids_length = 0;
         if (root->left != nullptr) {
-            auto rearrange_op = rearrange_filter_tree(root->left, l_filter_ids_length, collection_name);
+            auto rearrange_op = _rearrange_filter_tree(root->left, l_filter_ids_length, collection_name);
             if (!rearrange_op.ok()) {
                 return rearrange_op;
             }
@@ -2072,7 +2072,7 @@ Option<bool> Index::rearrange_filter_tree(filter_node_t* const root,
 
         uint32_t r_filter_ids_length = 0;
         if (root->right != nullptr) {
-            auto rearrange_op = rearrange_filter_tree(root->right, r_filter_ids_length, collection_name);
+            auto rearrange_op = _rearrange_filter_tree(root->right, r_filter_ids_length, collection_name);
             if (!rearrange_op.ok()) {
                 return rearrange_op;
             }
@@ -2091,15 +2091,15 @@ Option<bool> Index::rearrange_filter_tree(filter_node_t* const root,
         return Option(true);
     }
 
-    approximate_filter_ids(root->filter_exp, filter_ids_length, collection_name);
+    _approximate_filter_ids(root->filter_exp, filter_ids_length, collection_name);
     return Option(true);
 }
 
-Option<bool> Index::rearranging_recursive_filter(filter_node_t* const filter_tree_root,
+Option<bool> Index::_rearranging_recursive_filter(filter_node_t* const filter_tree_root,
                                                  filter_result_t& result,
                                                  const std::string& collection_name) const {
     uint32_t filter_ids_length = 0;
-    auto rearrange_op = rearrange_filter_tree(filter_tree_root, filter_ids_length, collection_name);
+    auto rearrange_op = _rearrange_filter_tree(filter_tree_root, filter_ids_length, collection_name);
     if (!rearrange_op.ok()) {
         return rearrange_op;
     }
@@ -2189,7 +2189,7 @@ Option<bool> Index::adaptive_filter(filter_node_t* const filter_tree_root,
     metrics->and_operator_count > 0 &&
     // If there are more || in the filter tree than &&, we'll not gain much by rearranging the filter tree.
     ((float) metrics->or_operator_count / (float) metrics->and_operator_count < 0.5)) {
-        return rearranging_recursive_filter(filter_tree_root, result, collection_name);
+        return _rearranging_recursive_filter(filter_tree_root, result, collection_name);
     } else {
         return recursive_filter(filter_tree_root, result, collection_name);
     }
@@ -2252,7 +2252,7 @@ Option<bool> Index::get_approximate_reference_filter_ids_with_lock(filter_node_t
                                                                    uint32_t& filter_ids_length) const {
     std::shared_lock lock(mutex);
 
-    return rearrange_filter_tree(filter_tree_root, filter_ids_length);
+    return _rearrange_filter_tree(filter_tree_root, filter_ids_length);
 }
 
 Option<bool> Index::run_search(search_args* search_params, const std::string& collection_name) {
diff --git a/test/collection_specific_more_test.cpp b/test/collection_specific_more_test.cpp
index 8afdaf84..b34b9973 100644
--- a/test/collection_specific_more_test.cpp
+++ b/test/collection_specific_more_test.cpp
@@ -2077,8 +2077,7 @@ TEST_F(CollectionSpecificMoreTest, RearrangingFilterTree) {
     ASSERT_TRUE(root->right == nullptr);
 
     filter_result_t result;
-    // Internally calls rearranging_recursive_filter
-    coll->_get_index()->do_filtering_with_lock(filter_tree_root, result);
+    coll->_get_index()->_rearranging_recursive_filter(filter_tree_root, result);
 
     //                 &&
     //               /    \
@@ -2150,6 +2149,8 @@ TEST_F(CollectionSpecificMoreTest, RearrangingFilterTree) {
     ASSERT_EQ(root->filter_exp.field_name, "years");
     ASSERT_TRUE(root->left == nullptr);
     ASSERT_TRUE(root->right == nullptr);
+
+    collectionManager.drop_collection("Collection");
 }
 
 TEST_F(CollectionSpecificMoreTest, ApproxFilterMatchCount) {
@@ -2160,7 +2161,8 @@ TEST_F(CollectionSpecificMoreTest, ApproxFilterMatchCount) {
                     {"name": "name", "type": "string"},
                     {"name": "age", "type": "int32"},
                     {"name": "years", "type": "int32[]"},
-                    {"name": "rating", "type": "float"}
+                    {"name": "rating", "type": "float"},
+                    {"name": "location", "type": "geopoint", "optional": true}
                 ]
             })"_json;
 
@@ -2174,8 +2176,32 @@ TEST_F(CollectionSpecificMoreTest, ApproxFilterMatchCount) {
     }
     infile.close();
 
+    const std::string doc_id_prefix = std::to_string(coll->get_collection_id()) + "_" + Collection::DOC_ID_PREFIX + "_";
+    filter_node_t* filter_tree_root = nullptr;
+    Option<bool> filter_op = filter::parse_filter_query("name: Jeremy", coll->get_schema(), store, doc_id_prefix,
+                                                        filter_tree_root);
+    ASSERT_TRUE(filter_op.ok());
+
     uint32_t approx_count;
-    coll->get_approximate_reference_filter_ids("years:>2000 && ((age:<30 && rating:>5) || (age:>50 && rating:<5))",
-                                               approx_count);
+    coll->_get_index()->_approximate_filter_ids(filter_tree_root->filter_exp, approx_count);
+    ASSERT_EQ(approx_count, 5);
+
+    delete filter_tree_root;
+    filter_op = filter::parse_filter_query("location:(48.8662, 2.3255, 48.8581, 2.3209, 48.8561, 2.3448, 48.8641, 2.3469)",
+                                           coll->get_schema(), store, doc_id_prefix, filter_tree_root);
+    ASSERT_TRUE(filter_op.ok());
+
+    coll->_get_index()->_approximate_filter_ids(filter_tree_root->filter_exp, approx_count);
+    ASSERT_EQ(approx_count, 100);
+
+    delete filter_tree_root;
+    filter_op = filter::parse_filter_query("years:>2000 && ((age:<30 && rating:>5) || (age:>50 && rating:<5))",
+                                                        coll->get_schema(), store, doc_id_prefix, filter_tree_root);
+    ASSERT_TRUE(filter_op.ok());
+
+    coll->_get_index()->_rearrange_filter_tree(filter_tree_root, approx_count);
     ASSERT_EQ(approx_count, 3);
+
+    delete filter_tree_root;
+    collectionManager.drop_collection("Collection");
 }
\ No newline at end of file

From f38f3f9792a95e2c3fdfae93fd539133e07ef4be Mon Sep 17 00:00:00 2001
From: Harpreet Sangar <happy_san@protonmail.com>
Date: Thu, 9 Mar 2023 14:08:15 +0530
Subject: [PATCH 43/51] Remove `filter_node_t::metrics`. Update function
 signatures to accept context ids.

---
 include/field.h                        |  2 -
 include/index.h                        | 26 +++++------
 include/num_tree.h                     |  4 +-
 src/field.cpp                          | 14 +-----
 src/index.cpp                          | 61 ++++++++++----------------
 src/num_tree.cpp                       |  4 +-
 test/collection_specific_more_test.cpp |  6 +--
 7 files changed, 45 insertions(+), 72 deletions(-)

diff --git a/include/field.h b/include/field.h
index 7eee5c79..dd0033eb 100644
--- a/include/field.h
+++ b/include/field.h
@@ -606,7 +606,6 @@ struct filter_node_t {
     bool isOperator;
     filter_node_t* left = nullptr;
     filter_node_t* right = nullptr;
-    filter_tree_metrics* metrics = nullptr;
 
     filter_node_t(filter filter_exp)
             : filter_exp(std::move(filter_exp)),
@@ -623,7 +622,6 @@ struct filter_node_t {
               right(right) {}
 
     ~filter_node_t() {
-        delete metrics;
         delete left;
         delete right;
     }
diff --git a/include/index.h b/include/index.h
index ff8431c8..62e75180 100644
--- a/include/index.h
+++ b/include/index.h
@@ -468,17 +468,17 @@ private:
     void numeric_not_equals_filter(num_tree_t* const num_tree,
                                    const int64_t value,
                                    const uint32_t& context_ids_length,
-                                   const uint32_t* context_ids,
+                                   uint32_t* const& context_ids,
                                    size_t& ids_len,
                                    uint32_t*& ids) const;
 
     bool field_is_indexed(const std::string& field_name) const;
 
-    Option<bool> do_filtering(filter_node_t* const root,
-                              filter_result_t& result,
-                              const std::string& collection_name = "",
-                              const uint32_t& context_ids_length = 0,
-                              const uint32_t* context_ids = nullptr) const;
+    Option<bool> _do_filtering(filter_node_t* const root,
+                               filter_result_t& result,
+                               const std::string& collection_name = "",
+                               const uint32_t& context_ids_length = 0,
+                               uint32_t* const& context_ids = nullptr) const;
 
     void aproximate_numerical_match(num_tree_t* const num_tree,
                                     const NUM_COMPARATOR& comparator,
@@ -488,7 +488,9 @@ private:
 
     Option<bool> recursive_filter(filter_node_t* const root,
                                   filter_result_t& result,
-                                  const std::string& collection_name = "") const;
+                                  const std::string& collection_name = "",
+                                  const uint32_t& context_ids_length = 0,
+                                  uint32_t* const& context_ids = nullptr) const;
 
     Option<bool> adaptive_filter(filter_node_t* const filter_tree_root,
                                  filter_result_t& result,
@@ -689,17 +691,13 @@ public:
                                         filter_result_t& filter_result,
                                         const std::string& collection_name = "") const;
 
-    Option<bool> _rearranging_recursive_filter(filter_node_t* const filter_tree_root,
-                                              filter_result_t& result,
-                                              const std::string& collection_name = "") const;
-
-    Option<bool> _rearrange_filter_tree(filter_node_t* const root,
+    Option<bool> rearrange_filter_tree(filter_node_t* const root,
                                        uint32_t& filter_ids_length,
                                        const std::string& collection_name = "") const;
 
     Option<bool> _approximate_filter_ids(const filter& a_filter,
-                                        uint32_t& filter_ids_length,
-                                        const std::string& collection_name = "") const;
+                                         uint32_t& filter_ids_length,
+                                         const std::string& collection_name = "") const;
 
     Option<bool> do_reference_filtering_with_lock(filter_node_t* const filter_tree_root,
                                                   filter_result_t& filter_result,
diff --git a/include/num_tree.h b/include/num_tree.h
index 280f47dd..5406a109 100644
--- a/include/num_tree.h
+++ b/include/num_tree.h
@@ -34,7 +34,7 @@ public:
 
     void range_inclusive_contains(const int64_t& start, const int64_t& end,
                                   const uint32_t& context_ids_length,
-                                  const uint32_t*& context_ids,
+                                  uint32_t* const& context_ids,
                                   size_t& result_ids_len,
                                   uint32_t*& result_ids) const;
 
@@ -50,7 +50,7 @@ public:
 
     void contains(const NUM_COMPARATOR& comparator, const int64_t& value,
                   const uint32_t& context_ids_length,
-                  const uint32_t*& context_ids,
+                  uint32_t* const& context_ids,
                   size_t& result_ids_len,
                   uint32_t*& result_ids) const;
 };
\ No newline at end of file
diff --git a/src/field.cpp b/src/field.cpp
index 129c7512..c7297359 100644
--- a/src/field.cpp
+++ b/src/field.cpp
@@ -384,9 +384,7 @@ Option<bool> toFilter(const std::string expression,
 Option<bool> toParseTree(std::queue<std::string>& postfix, filter_node_t*& root,
                          const tsl::htrie_map<char, field>& search_schema,
                          const Store* store,
-                         const std::string& doc_id_prefix,
-                         int& and_operator_count,
-                         int& or_operator_count) {
+                         const std::string& doc_id_prefix) {
     std::stack<filter_node_t*> nodeStack;
     bool is_successful = true;
     std::string error_message;
@@ -413,7 +411,6 @@ Option<bool> toParseTree(std::queue<std::string>& postfix, filter_node_t*& root,
             auto operandA = nodeStack.top();
             nodeStack.pop();
 
-            expression == "&&" ? and_operator_count++ : or_operator_count++;
             filter_node = new filter_node_t(expression == "&&" ? AND : OR, operandA, operandB);
         } else {
             filter filter_exp;
@@ -502,22 +499,15 @@ Option<bool> filter::parse_filter_query(const std::string& filter_query,
         return toPostfix_op;
     }
 
-    int postfix_size = (int) postfix.size(), and_operator_count = 0, or_operator_count = 0;
     Option<bool> toParseTree_op = toParseTree(postfix,
                                               root,
                                               search_schema,
                                               store,
-                                              doc_id_prefix,
-                                              and_operator_count,
-                                              or_operator_count);
+                                              doc_id_prefix);
     if (!toParseTree_op.ok()) {
         return toParseTree_op;
     }
 
-    root->metrics = new filter_tree_metrics{static_cast<int>(postfix_size - (and_operator_count + or_operator_count)),
-                     and_operator_count,
-                     or_operator_count};
-
     return Option<bool>(true);
 }
 
diff --git a/src/index.cpp b/src/index.cpp
index 124cf567..73b0cd08 100644
--- a/src/index.cpp
+++ b/src/index.cpp
@@ -1452,7 +1452,7 @@ void Index::search_candidates(const uint8_t & field_id, bool field_is_array,
 void Index::numeric_not_equals_filter(num_tree_t* const num_tree,
                                       const int64_t value,
                                       const uint32_t& context_ids_length,
-                                      const uint32_t* context_ids,
+                                      uint32_t* const& context_ids,
                                       size_t& ids_len,
                                       uint32_t*& ids) const {
     uint32_t* to_exclude_ids = nullptr;
@@ -1491,11 +1491,11 @@ bool Index::field_is_indexed(const std::string& field_name) const {
     geopoint_index.count(field_name) != 0;
 }
 
-Option<bool> Index::do_filtering(filter_node_t* const root,
-                                 filter_result_t& result,
-                                 const std::string& collection_name,
-                                 const uint32_t& context_ids_length,
-                                 const uint32_t* context_ids) const {
+Option<bool> Index::_do_filtering(filter_node_t* const root,
+                                  filter_result_t& result,
+                                  const std::string& collection_name,
+                                  const uint32_t& context_ids_length,
+                                  uint32_t* const& context_ids) const {
     // auto begin = std::chrono::high_resolution_clock::now();
     const filter a_filter = root->filter_exp;
 
@@ -1953,8 +1953,8 @@ void Index::aproximate_numerical_match(num_tree_t* const num_tree,
 }
 
 Option<bool> Index::_approximate_filter_ids(const filter& a_filter,
-                                           uint32_t& filter_ids_length,
-                                           const std::string& collection_name) const {
+                                            uint32_t& filter_ids_length,
+                                            const std::string& collection_name) const {
     if (!a_filter.referenced_collection_name.empty()) {
         auto& cm = CollectionManager::get_instance();
         auto collection = cm.get_collection(a_filter.referenced_collection_name);
@@ -2054,7 +2054,7 @@ Option<bool> Index::_approximate_filter_ids(const filter& a_filter,
     return Option(true);
 }
 
-Option<bool> Index::_rearrange_filter_tree(filter_node_t* const root,
+Option<bool> Index::rearrange_filter_tree(filter_node_t* const root,
                                           uint32_t& filter_ids_length,
                                           const std::string& collection_name) const {
     if (root == nullptr) {
@@ -2064,7 +2064,7 @@ Option<bool> Index::_rearrange_filter_tree(filter_node_t* const root,
     if (root->isOperator) {
         uint32_t l_filter_ids_length = 0;
         if (root->left != nullptr) {
-            auto rearrange_op = _rearrange_filter_tree(root->left, l_filter_ids_length, collection_name);
+            auto rearrange_op = rearrange_filter_tree(root->left, l_filter_ids_length, collection_name);
             if (!rearrange_op.ok()) {
                 return rearrange_op;
             }
@@ -2072,7 +2072,7 @@ Option<bool> Index::_rearrange_filter_tree(filter_node_t* const root,
 
         uint32_t r_filter_ids_length = 0;
         if (root->right != nullptr) {
-            auto rearrange_op = _rearrange_filter_tree(root->right, r_filter_ids_length, collection_name);
+            auto rearrange_op = rearrange_filter_tree(root->right, r_filter_ids_length, collection_name);
             if (!rearrange_op.ok()) {
                 return rearrange_op;
             }
@@ -2095,18 +2095,6 @@ Option<bool> Index::_rearrange_filter_tree(filter_node_t* const root,
     return Option(true);
 }
 
-Option<bool> Index::_rearranging_recursive_filter(filter_node_t* const filter_tree_root,
-                                                 filter_result_t& result,
-                                                 const std::string& collection_name) const {
-    uint32_t filter_ids_length = 0;
-    auto rearrange_op = _rearrange_filter_tree(filter_tree_root, filter_ids_length, collection_name);
-    if (!rearrange_op.ok()) {
-        return rearrange_op;
-    }
-
-    return recursive_filter(filter_tree_root, result, collection_name);
-}
-
 void copy_reference_ids(filter_result_t& from, filter_result_t& to) {
     if (to.count > 0 && !from.reference_filter_results.empty()) {
         for (const auto &item: from.reference_filter_results) {
@@ -2132,7 +2120,9 @@ void copy_reference_ids(filter_result_t& from, filter_result_t& to) {
 
 Option<bool> Index::recursive_filter(filter_node_t* const root,
                                      filter_result_t& result,
-                                     const std::string& collection_name) const {
+                                     const std::string& collection_name,
+                                     const uint32_t& context_ids_length,
+                                     uint32_t* const& context_ids) const {
     if (root == nullptr) {
         return Option(true);
     }
@@ -2140,7 +2130,7 @@ Option<bool> Index::recursive_filter(filter_node_t* const root,
     if (root->isOperator) {
         filter_result_t l_result;
         if (root->left != nullptr) {
-            auto filter_op = recursive_filter(root->left, l_result , collection_name);
+            auto filter_op = recursive_filter(root->left, l_result , collection_name, context_ids_length, context_ids);
             if (!filter_op.ok()) {
                 return filter_op;
             }
@@ -2148,7 +2138,7 @@ Option<bool> Index::recursive_filter(filter_node_t* const root,
 
         filter_result_t r_result;
         if (root->right != nullptr) {
-            auto filter_op = recursive_filter(root->right, r_result , collection_name);
+            auto filter_op = recursive_filter(root->right, r_result , collection_name, context_ids_length, context_ids);
             if (!filter_op.ok()) {
                 return filter_op;
             }
@@ -2173,7 +2163,7 @@ Option<bool> Index::recursive_filter(filter_node_t* const root,
         return Option(true);
     }
 
-    return do_filtering(root, result, collection_name);
+    return _do_filtering(root, result, collection_name, context_ids_length, context_ids);
 }
 
 Option<bool> Index::adaptive_filter(filter_node_t* const filter_tree_root,
@@ -2183,16 +2173,13 @@ Option<bool> Index::adaptive_filter(filter_node_t* const filter_tree_root,
         return Option(true);
     }
 
-    auto metrics = filter_tree_root->metrics;
-    if (metrics != nullptr &&
-    metrics->filter_exp_count > 2 &&
-    metrics->and_operator_count > 0 &&
-    // If there are more || in the filter tree than &&, we'll not gain much by rearranging the filter tree.
-    ((float) metrics->or_operator_count / (float) metrics->and_operator_count < 0.5)) {
-        return _rearranging_recursive_filter(filter_tree_root, result, collection_name);
-    } else {
-        return recursive_filter(filter_tree_root, result, collection_name);
+    uint32_t filter_ids_length = 0;
+    auto op = rearrange_filter_tree(filter_tree_root, filter_ids_length, collection_name);
+    if (!op.ok()) {
+        return op;
     }
+
+    return recursive_filter(filter_tree_root, result, collection_name);
 }
 
 Option<bool> Index::do_filtering_with_lock(filter_node_t* const filter_tree_root,
@@ -2252,7 +2239,7 @@ Option<bool> Index::get_approximate_reference_filter_ids_with_lock(filter_node_t
                                                                    uint32_t& filter_ids_length) const {
     std::shared_lock lock(mutex);
 
-    return _rearrange_filter_tree(filter_tree_root, filter_ids_length);
+    return rearrange_filter_tree(filter_tree_root, filter_ids_length);
 }
 
 Option<bool> Index::run_search(search_args* search_params, const std::string& collection_name) {
diff --git a/src/num_tree.cpp b/src/num_tree.cpp
index 5a1b95d3..1bcdbc9f 100644
--- a/src/num_tree.cpp
+++ b/src/num_tree.cpp
@@ -75,7 +75,7 @@ bool num_tree_t::range_inclusive_contains(const int64_t& start, const int64_t& e
 
 void num_tree_t::range_inclusive_contains(const int64_t& start, const int64_t& end,
                                           const uint32_t& context_ids_length,
-                                          const uint32_t*& context_ids,
+                                          uint32_t* const& context_ids,
                                           size_t& result_ids_len,
                                           uint32_t*& result_ids) const {
     if (int64map.empty()) {
@@ -251,7 +251,7 @@ void num_tree_t::remove(uint64_t value, uint32_t id) {
 
 void num_tree_t::contains(const NUM_COMPARATOR& comparator, const int64_t& value,
                           const uint32_t& context_ids_length,
-                          const uint32_t*& context_ids,
+                          uint32_t* const& context_ids,
                           size_t& result_ids_len,
                           uint32_t*& result_ids) const {
     if (int64map.empty()) {
diff --git a/test/collection_specific_more_test.cpp b/test/collection_specific_more_test.cpp
index b34b9973..2e9369cf 100644
--- a/test/collection_specific_more_test.cpp
+++ b/test/collection_specific_more_test.cpp
@@ -2076,8 +2076,8 @@ TEST_F(CollectionSpecificMoreTest, RearrangingFilterTree) {
     ASSERT_TRUE(root->left == nullptr);
     ASSERT_TRUE(root->right == nullptr);
 
-    filter_result_t result;
-    coll->_get_index()->_rearranging_recursive_filter(filter_tree_root, result);
+    uint32_t count = 0;
+    coll->_get_index()->rearrange_filter_tree(filter_tree_root, count);
 
     //                 &&
     //               /    \
@@ -2199,7 +2199,7 @@ TEST_F(CollectionSpecificMoreTest, ApproxFilterMatchCount) {
                                                         coll->get_schema(), store, doc_id_prefix, filter_tree_root);
     ASSERT_TRUE(filter_op.ok());
 
-    coll->_get_index()->_rearrange_filter_tree(filter_tree_root, approx_count);
+    coll->_get_index()->rearrange_filter_tree(filter_tree_root, approx_count);
     ASSERT_EQ(approx_count, 3);
 
     delete filter_tree_root;

From f3706f737baa5a351d3064f80f5a2e8094e1023d Mon Sep 17 00:00:00 2001
From: Harpreet Sangar <happy_san@protonmail.com>
Date: Thu, 9 Mar 2023 14:26:18 +0530
Subject: [PATCH 44/51] Refactor `std::unique_ptr<filter_node_t>` order.

---
 src/collection.cpp | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/collection.cpp b/src/collection.cpp
index 9d1c0635..d35e2e10 100644
--- a/src/collection.cpp
+++ b/src/collection.cpp
@@ -2509,12 +2509,12 @@ Option<bool> Collection::get_filter_ids(const std::string& filter_query, filter_
     filter_node_t* filter_tree_root = nullptr;
     Option<bool> filter_op = filter::parse_filter_query(filter_query, search_schema,
                                                         store, doc_id_prefix, filter_tree_root);
+    std::unique_ptr<filter_node_t> filter_tree_root_guard(filter_tree_root);
+
     if(!filter_op.ok()) {
         return filter_op;
     }
 
-    std::unique_ptr<filter_node_t> filter_tree_root_guard(filter_tree_root);
-
     return index->do_filtering_with_lock(filter_tree_root, filter_result, name);
 }
 
@@ -2544,12 +2544,12 @@ Option<bool> Collection::get_approximate_reference_filter_ids(const std::string&
     filter_node_t* filter_tree_root = nullptr;
     Option<bool> parse_op = filter::parse_filter_query(filter_query, search_schema,
                                                        store, doc_id_prefix, filter_tree_root);
+    std::unique_ptr<filter_node_t> filter_tree_root_guard(filter_tree_root);
+
     if(!parse_op.ok()) {
         return parse_op;
     }
 
-    std::unique_ptr<filter_node_t> filter_tree_root_guard(filter_tree_root);
-
     return index->get_approximate_reference_filter_ids_with_lock(filter_tree_root, filter_ids_length);
 }
 
@@ -2567,12 +2567,12 @@ Option<bool> Collection::get_reference_filter_ids(const std::string & filter_que
     filter_node_t* filter_tree_root = nullptr;
     Option<bool> parse_op = filter::parse_filter_query(filter_query, search_schema,
                                                        store, doc_id_prefix, filter_tree_root);
+    std::unique_ptr<filter_node_t> filter_tree_root_guard(filter_tree_root);
+
     if(!parse_op.ok()) {
         return parse_op;
     }
 
-    std::unique_ptr<filter_node_t> filter_tree_root_guard(filter_tree_root);
-
     // Reference helper field has the sequence id of other collection's documents.
     auto field_name = reference_field_op.get() + REFERENCE_HELPER_FIELD_SUFFIX;
     return index->do_reference_filtering_with_lock(filter_tree_root, filter_result, name, field_name);

From cd2f5be875c67e1a4242ff4e65c05a4a3f690f90 Mon Sep 17 00:00:00 2001
From: Harpreet Sangar <happy_san@protonmail.com>
Date: Thu, 9 Mar 2023 15:06:11 +0530
Subject: [PATCH 45/51] Remove `Index::adaptive_filter`.

---
 include/index.h |  4 ----
 src/index.cpp   | 31 ++++++++++---------------------
 2 files changed, 10 insertions(+), 25 deletions(-)

diff --git a/include/index.h b/include/index.h
index 62e75180..43ec2f28 100644
--- a/include/index.h
+++ b/include/index.h
@@ -492,10 +492,6 @@ private:
                                   const uint32_t& context_ids_length = 0,
                                   uint32_t* const& context_ids = nullptr) const;
 
-    Option<bool> adaptive_filter(filter_node_t* const filter_tree_root,
-                                 filter_result_t& result,
-                                 const std::string& collection_name = "") const;
-
     void insert_doc(const int64_t score, art_tree *t, uint32_t seq_id,
                     const std::unordered_map<std::string, std::vector<uint32_t>> &token_to_offsets) const;
 
diff --git a/src/index.cpp b/src/index.cpp
index 73b0cd08..d4f4f9b3 100644
--- a/src/index.cpp
+++ b/src/index.cpp
@@ -2166,28 +2166,12 @@ Option<bool> Index::recursive_filter(filter_node_t* const root,
     return _do_filtering(root, result, collection_name, context_ids_length, context_ids);
 }
 
-Option<bool> Index::adaptive_filter(filter_node_t* const filter_tree_root,
-                                    filter_result_t& result,
-                                    const std::string& collection_name) const {
-    if (filter_tree_root == nullptr) {
-        return Option(true);
-    }
-
-    uint32_t filter_ids_length = 0;
-    auto op = rearrange_filter_tree(filter_tree_root, filter_ids_length, collection_name);
-    if (!op.ok()) {
-        return op;
-    }
-
-    return recursive_filter(filter_tree_root, result, collection_name);
-}
-
 Option<bool> Index::do_filtering_with_lock(filter_node_t* const filter_tree_root,
                                            filter_result_t& filter_result,
                                            const std::string& collection_name) const {
     std::shared_lock lock(mutex);
 
-    auto filter_op = adaptive_filter(filter_tree_root, filter_result, collection_name);
+    auto filter_op = recursive_filter(filter_tree_root, filter_result, collection_name);
     if (!filter_op.ok()) {
         return filter_op;
     }
@@ -2202,7 +2186,7 @@ Option<bool> Index::do_reference_filtering_with_lock(filter_node_t* const filter
     std::shared_lock lock(mutex);
 
     filter_result_t reference_filter_result;
-    auto filter_op = adaptive_filter(filter_tree_root, reference_filter_result);
+    auto filter_op = recursive_filter(filter_tree_root, reference_filter_result);
     if (!filter_op.ok()) {
         return filter_op;
     }
@@ -2730,9 +2714,14 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
                    const std::string& collection_name) const {
     std::shared_lock lock(mutex);
 
+    uint32_t filter_ids_length = 0;
+    auto rearrange_op = rearrange_filter_tree(filter_tree_root, filter_ids_length, collection_name);
+    if (!rearrange_op.ok()) {
+        return rearrange_op;
+    }
+
     filter_result_t filter_result;
-    // process the filters
-    auto filter_op = adaptive_filter(filter_tree_root, filter_result, collection_name);
+    auto filter_op = recursive_filter(filter_tree_root, filter_result, collection_name);
     if (!filter_op.ok()) {
         return filter_op;
     }
@@ -4840,7 +4829,7 @@ void Index::populate_sort_mapping(int* sort_order, std::vector<size_t>& geopoint
         } else if (sort_fields_std[i].name == sort_field_const::eval) {
             field_values[i] = &eval_sentinel_value;
             filter_result_t result;
-            adaptive_filter(sort_fields_std[i].eval.filter_tree_root, result);
+            recursive_filter(sort_fields_std[i].eval.filter_tree_root, result);
             sort_fields_std[i].eval.ids = result.docs;
             sort_fields_std[i].eval.size = result.count;
             result.docs = nullptr;

From 2a1feae0ee2dca34071bc94c0eff35f4bdca1d80 Mon Sep 17 00:00:00 2001
From: Harpreet Sangar <happy_san@protonmail.com>
Date: Thu, 9 Mar 2023 19:53:50 +0530
Subject: [PATCH 46/51] Add comments.

---
 include/collection.h |  1 +
 include/field.h      |  6 ------
 include/index.h      | 12 ++++++++++--
 3 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/include/collection.h b/include/collection.h
index 4e08d4da..dc8f41db 100644
--- a/include/collection.h
+++ b/include/collection.h
@@ -457,6 +457,7 @@ public:
 
     Option<bool> get_filter_ids(const std::string & filter_query, filter_result_t& filter_result) const;
 
+    /// Get approximate count of docs matching a reference filter on foo collection when $foo(...) filter is encountered.
     Option<bool> get_approximate_reference_filter_ids(const std::string& filter_query,
                                                       uint32_t& filter_ids_length) const;
 
diff --git a/include/field.h b/include/field.h
index dd0033eb..a4ac81b8 100644
--- a/include/field.h
+++ b/include/field.h
@@ -594,12 +594,6 @@ struct filter {
                                            filter_node_t*& root);
 };
 
-struct filter_tree_metrics {
-    int filter_exp_count;
-    int and_operator_count;
-    int or_operator_count;
-};
-
 struct filter_node_t {
     filter filter_exp;
     FILTER_OPERATOR filter_operator;
diff --git a/include/index.h b/include/index.h
index 43ec2f28..9f170a4d 100644
--- a/include/index.h
+++ b/include/index.h
@@ -486,8 +486,15 @@ private:
                                     const int64_t& range_end_value,
                                     uint32_t& filter_ids_length) const;
 
-    Option<bool> recursive_filter(filter_node_t* const root,
-                                  filter_result_t& result,
+    /// Traverses through filter tree to get the filter_result.
+    ///
+    /// \param filter_tree_root
+    /// \param filter_result
+    /// \param collection_name Name of the collection to which current index belongs. Used to find the reference field in other collection.
+    /// \param context_ids_length Number of docs matching the search query.
+    /// \param context_ids Array of doc ids matching the search query.
+    Option<bool> recursive_filter(filter_node_t* const filter_tree_root,
+                                  filter_result_t& filter_result,
                                   const std::string& collection_name = "",
                                   const uint32_t& context_ids_length = 0,
                                   uint32_t* const& context_ids = nullptr) const;
@@ -700,6 +707,7 @@ public:
                                                   const std::string& collection_name,
                                                   const std::string& reference_helper_field_name) const;
 
+    /// Get approximate count of docs matching a reference filter on foo collection when $foo(...) filter is encountered.
     Option<bool> get_approximate_reference_filter_ids_with_lock(filter_node_t* const filter_tree_root,
                                                                 uint32_t& filter_ids_length) const;
 

From 63f8d33b5fbe703d3728a876cff6bb6a0220c7ea Mon Sep 17 00:00:00 2001
From: Harpreet Sangar <happy_san@protonmail.com>
Date: Thu, 9 Mar 2023 20:41:58 +0530
Subject: [PATCH 47/51] Add comments.

---
 include/index.h | 20 +++++++++++++-------
 src/index.cpp   | 20 ++++++++++----------
 2 files changed, 23 insertions(+), 17 deletions(-)

diff --git a/include/index.h b/include/index.h
index 9f170a4d..74e7e0ee 100644
--- a/include/index.h
+++ b/include/index.h
@@ -474,11 +474,11 @@ private:
 
     bool field_is_indexed(const std::string& field_name) const;
 
-    Option<bool> _do_filtering(filter_node_t* const root,
-                               filter_result_t& result,
-                               const std::string& collection_name = "",
-                               const uint32_t& context_ids_length = 0,
-                               uint32_t* const& context_ids = nullptr) const;
+    Option<bool> do_filtering(filter_node_t* const root,
+                              filter_result_t& result,
+                              const std::string& collection_name = "",
+                              const uint32_t& context_ids_length = 0,
+                              uint32_t* const& context_ids = nullptr) const;
 
     void aproximate_numerical_match(num_tree_t* const num_tree,
                                     const NUM_COMPARATOR& comparator,
@@ -694,8 +694,14 @@ public:
                                         filter_result_t& filter_result,
                                         const std::string& collection_name = "") const;
 
-    Option<bool> rearrange_filter_tree(filter_node_t* const root,
-                                       uint32_t& filter_ids_length,
+    /// Traverses through filter tree and gets an approximate doc count for each filter. Also arranges the children of
+    /// each operator in ascending order based on their approx doc count.
+    ///
+    /// \param filter_tree_root
+    /// \param approx_filter_ids_length Approximate count of docs that would match the whole filter_by clause.
+    /// \param collection_name Name of the collection to which current index belongs. Used to find the reference field in other collection.
+    Option<bool> rearrange_filter_tree(filter_node_t* const filter_tree_root,
+                                       uint32_t& approx_filter_ids_length,
                                        const std::string& collection_name = "") const;
 
     Option<bool> _approximate_filter_ids(const filter& a_filter,
diff --git a/src/index.cpp b/src/index.cpp
index d4f4f9b3..bb5ca2c4 100644
--- a/src/index.cpp
+++ b/src/index.cpp
@@ -1491,11 +1491,11 @@ bool Index::field_is_indexed(const std::string& field_name) const {
     geopoint_index.count(field_name) != 0;
 }
 
-Option<bool> Index::_do_filtering(filter_node_t* const root,
-                                  filter_result_t& result,
-                                  const std::string& collection_name,
-                                  const uint32_t& context_ids_length,
-                                  uint32_t* const& context_ids) const {
+Option<bool> Index::do_filtering(filter_node_t* const root,
+                                 filter_result_t& result,
+                                 const std::string& collection_name,
+                                 const uint32_t& context_ids_length,
+                                 uint32_t* const& context_ids) const {
     // auto begin = std::chrono::high_resolution_clock::now();
     const filter a_filter = root->filter_exp;
 
@@ -2055,7 +2055,7 @@ Option<bool> Index::_approximate_filter_ids(const filter& a_filter,
 }
 
 Option<bool> Index::rearrange_filter_tree(filter_node_t* const root,
-                                          uint32_t& filter_ids_length,
+                                          uint32_t& approx_filter_ids_length,
                                           const std::string& collection_name) const {
     if (root == nullptr) {
         return Option(true);
@@ -2079,9 +2079,9 @@ Option<bool> Index::rearrange_filter_tree(filter_node_t* const root,
         }
 
         if (root->filter_operator == AND) {
-            filter_ids_length = std::min(l_filter_ids_length, r_filter_ids_length);
+            approx_filter_ids_length = std::min(l_filter_ids_length, r_filter_ids_length);
         } else {
-            filter_ids_length = l_filter_ids_length + r_filter_ids_length;
+            approx_filter_ids_length = l_filter_ids_length + r_filter_ids_length;
         }
 
         if (l_filter_ids_length > r_filter_ids_length) {
@@ -2091,7 +2091,7 @@ Option<bool> Index::rearrange_filter_tree(filter_node_t* const root,
         return Option(true);
     }
 
-    _approximate_filter_ids(root->filter_exp, filter_ids_length, collection_name);
+    _approximate_filter_ids(root->filter_exp, approx_filter_ids_length, collection_name);
     return Option(true);
 }
 
@@ -2163,7 +2163,7 @@ Option<bool> Index::recursive_filter(filter_node_t* const root,
         return Option(true);
     }
 
-    return _do_filtering(root, result, collection_name, context_ids_length, context_ids);
+    return do_filtering(root, result, collection_name, context_ids_length, context_ids);
 }
 
 Option<bool> Index::do_filtering_with_lock(filter_node_t* const filter_tree_root,

From 391d693ffa45cf2aedbe84d58c5cbc711a2b8332 Mon Sep 17 00:00:00 2001
From: Harpreet Sangar <happy_san@protonmail.com>
Date: Fri, 10 Mar 2023 11:56:08 +0530
Subject: [PATCH 48/51] Add `and_filter_result` function.

---
 src/index.cpp                 |  75 ++++++++++--
 test/collection_join_test.cpp | 208 ++++++++++++++++++++++++++++++++++
 2 files changed, 275 insertions(+), 8 deletions(-)

diff --git a/src/index.cpp b/src/index.cpp
index bb5ca2c4..03bc00ab 100644
--- a/src/index.cpp
+++ b/src/index.cpp
@@ -2095,6 +2095,66 @@ Option<bool> Index::rearrange_filter_tree(filter_node_t* const root,
     return Option(true);
 }
 
+void and_filter_result(const filter_result_t& a, const filter_result_t& b, filter_result_t& result) {
+    auto lenA = a.count, lenB = b.count;
+    if (lenA == 0 || lenB == 0) {
+        return;
+    }
+
+    result.docs = new uint32_t[std::min(lenA, lenB)];
+
+    auto A = a.docs, B = b.docs, out = result.docs;
+    const uint32_t *endA = A + lenA;
+    const uint32_t *endB = B + lenB;
+
+    for (auto const& item: a.reference_filter_results) {
+        result.reference_filter_results[item.first];
+    }
+    for (auto const& item: b.reference_filter_results) {
+        result.reference_filter_results[item.first];
+    }
+    for (auto& item: result.reference_filter_results) {
+        item.second = new reference_filter_result_t[std::min(lenA, lenB)];
+    }
+
+    while (true) {
+        while (*A < *B) {
+            SKIP_FIRST_COMPARE:
+            if (++A == endA) {
+                result.count = out - result.docs;
+                return;
+            }
+        }
+        while (*A > *B) {
+            if (++B == endB) {
+                result.count = out - result.docs;
+                return;
+            }
+        }
+        if (*A == *B) {
+            *out = *A;
+
+            for (auto const& item: a.reference_filter_results) {
+                result.reference_filter_results[item.first][out - result.docs] = item.second[A - a.docs];
+                item.second[A - a.docs].docs = nullptr;
+            }
+            for (auto const& item: b.reference_filter_results) {
+                result.reference_filter_results[item.first][out - result.docs] = item.second[B - b.docs];
+                item.second[B - b.docs].docs = nullptr;
+            }
+
+            out++;
+
+            if (++A == endA || ++B == endB) {
+                result.count = out - result.docs;
+                return;
+            }
+        } else {
+            goto SKIP_FIRST_COMPARE;
+        }
+    }
+}
+
 void copy_reference_ids(filter_result_t& from, filter_result_t& to) {
     if (to.count > 0 && !from.reference_filter_results.empty()) {
         for (const auto &item: from.reference_filter_results) {
@@ -2144,21 +2204,20 @@ Option<bool> Index::recursive_filter(filter_node_t* const root,
             }
         }
 
-        uint32_t* filtered_results = nullptr;
         if (root->filter_operator == AND) {
-            result.count = ArrayUtils::and_scalar(
-                    l_result.docs, l_result.count, r_result.docs,
-                    r_result.count, &filtered_results);
+            and_filter_result(l_result, r_result, result);
         } else {
+            uint32_t* filtered_results = nullptr;
             result.count = ArrayUtils::or_scalar(
                     l_result.docs, l_result.count, r_result.docs,
                     r_result.count, &filtered_results);
+
+            result.docs = filtered_results;
+            if (!l_result.reference_filter_results.empty() || !r_result.reference_filter_results.empty()) {
+                copy_reference_ids(!l_result.reference_filter_results.empty() ? l_result : r_result, result);
+            }
         }
 
-        result.docs = filtered_results;
-        if (!l_result.reference_filter_results.empty() || !r_result.reference_filter_results.empty()) {
-            copy_reference_ids(!l_result.reference_filter_results.empty() ? l_result : r_result, result);
-        }
 
         return Option(true);
     }
diff --git a/test/collection_join_test.cpp b/test/collection_join_test.cpp
index b25439e6..6df9d397 100644
--- a/test/collection_join_test.cpp
+++ b/test/collection_join_test.cpp
@@ -551,6 +551,214 @@ TEST_F(CollectionJoinTest, FilterByReference_MultipleMatch) {
     collectionManager.drop_collection("Links");
 }
 
+TEST_F(CollectionJoinTest, FilterByNReferences) {
+    auto schema_json =
+            R"({
+                "name": "Users",
+                "fields": [
+                    {"name": "user_id", "type": "string"},
+                    {"name": "user_name", "type": "string"}
+                ]
+            })"_json;
+    std::vector<nlohmann::json> documents = {
+            R"({
+                "user_id": "user_a",
+                "user_name": "Roshan"
+            })"_json,
+            R"({
+                "user_id": "user_b",
+                "user_name": "Ruby"
+            })"_json,
+            R"({
+                "user_id": "user_c",
+                "user_name": "Joe"
+            })"_json,
+            R"({
+                "user_id": "user_d",
+                "user_name": "Aby"
+            })"_json
+    };
+    auto collection_create_op = collectionManager.create_collection(schema_json);
+    ASSERT_TRUE(collection_create_op.ok());
+    for (auto const &json: documents) {
+        auto add_op = collection_create_op.get()->add(json.dump());
+        if (!add_op.ok()) {
+            LOG(INFO) << add_op.error();
+        }
+        ASSERT_TRUE(add_op.ok());
+    }
+
+    schema_json =
+            R"({
+                "name": "Repos",
+                "fields": [
+                    {"name": "repo_id", "type": "string"},
+                    {"name": "repo_content", "type": "string"},
+                    {"name": "repo_stars", "type": "int32"},
+                    {"name": "repo_is_private", "type": "bool"}
+                ]
+            })"_json;
+    documents = {
+            R"({
+                "repo_id": "repo_a",
+                "repo_content": "body1",
+                "repo_stars": 431,
+                "repo_is_private": true
+            })"_json,
+            R"({
+                "repo_id": "repo_b",
+                "repo_content": "body2",
+                "repo_stars": 4562,
+                "repo_is_private": false
+            })"_json,
+            R"({
+                "repo_id": "repo_c",
+                "repo_content": "body3",
+                "repo_stars": 945,
+                "repo_is_private": false
+            })"_json
+    };
+    collection_create_op = collectionManager.create_collection(schema_json);
+    ASSERT_TRUE(collection_create_op.ok());
+    for (auto const &json: documents) {
+        auto add_op = collection_create_op.get()->add(json.dump());
+        if (!add_op.ok()) {
+            LOG(INFO) << add_op.error();
+        }
+        ASSERT_TRUE(add_op.ok());
+    }
+
+    schema_json =
+            R"({
+                "name": "Links",
+                "fields": [
+                    {"name": "repo_id", "type": "string", "reference": "Repos.repo_id"},
+                    {"name": "user_id", "type": "string", "reference": "Users.user_id"}
+                ]
+            })"_json;
+    documents = {
+            R"({
+                "repo_id": "repo_a",
+                "user_id": "user_b"
+            })"_json,
+            R"({
+                "repo_id": "repo_a",
+                "user_id": "user_c"
+            })"_json,
+            R"({
+                "repo_id": "repo_b",
+                "user_id": "user_a"
+            })"_json,
+            R"({
+                "repo_id": "repo_b",
+                "user_id": "user_b"
+            })"_json,
+            R"({
+                "repo_id": "repo_b",
+                "user_id": "user_d"
+            })"_json,
+            R"({
+                "repo_id": "repo_c",
+                "user_id": "user_a"
+            })"_json,
+            R"({
+                "repo_id": "repo_c",
+                "user_id": "user_b"
+            })"_json,
+            R"({
+                "repo_id": "repo_c",
+                "user_id": "user_c"
+            })"_json,
+            R"({
+                "repo_id": "repo_c",
+                "user_id": "user_d"
+            })"_json
+    };
+    collection_create_op = collectionManager.create_collection(schema_json);
+    ASSERT_TRUE(collection_create_op.ok());
+
+    for (auto const &json: documents) {
+        auto add_op = collection_create_op.get()->add(json.dump());
+        if (!add_op.ok()) {
+            LOG(INFO) << add_op.error();
+        }
+        ASSERT_TRUE(add_op.ok());
+    }
+
+    schema_json =
+            R"({
+                "name": "Organizations",
+                "fields": [
+                    {"name": "org_id", "type": "string"},
+                    {"name": "org_name", "type": "string"}
+                ]
+            })"_json;
+    documents = {
+            R"({
+                "org_id": "org_a",
+                "org_name": "Typesense"
+            })"_json
+    };
+    collection_create_op = collectionManager.create_collection(schema_json);
+    ASSERT_TRUE(collection_create_op.ok());
+
+    for (auto const &json: documents) {
+        auto add_op = collection_create_op.get()->add(json.dump());
+        if (!add_op.ok()) {
+            LOG(INFO) << add_op.error();
+        }
+        ASSERT_TRUE(add_op.ok());
+    }
+
+    schema_json =
+            R"({
+                "name": "Participants",
+                "fields": [
+                    {"name": "user_id", "type": "string", "reference": "Users.user_id"},
+                    {"name": "org_id", "type": "string", "reference": "Organizations.org_id"}
+                ]
+            })"_json;
+    documents = {
+            R"({
+                "user_id": "user_a",
+                "org_id": "org_a"
+            })"_json,
+            R"({
+                "user_id": "user_b",
+                "org_id": "org_a"
+            })"_json,
+            R"({
+                "user_id": "user_d",
+                "org_id": "org_a"
+            })"_json,
+    };
+    collection_create_op = collectionManager.create_collection(schema_json);
+    ASSERT_TRUE(collection_create_op.ok());
+
+    for (auto const &json: documents) {
+        auto add_op = collection_create_op.get()->add(json.dump());
+        if (!add_op.ok()) {
+            LOG(INFO) << add_op.error();
+        }
+        ASSERT_TRUE(add_op.ok());
+    }
+
+    auto coll = collectionManager.get_collection_unsafe("Users");
+
+    // Search for users within an organization with access to a particular repo.
+    auto result = coll->search("R", {"user_name"}, "$Participants(org_id:=org_a) && $Links(repo_id:=repo_b)", {}, {}, {0},
+                               10, 1, FREQUENCY, {true}, Index::DROP_TOKENS_THRESHOLD).get();
+
+    ASSERT_EQ(2, result["found"].get<size_t>());
+    ASSERT_EQ(2, result["hits"].size());
+    ASSERT_EQ("user_b", result["hits"][0]["document"]["user_id"].get<std::string>());
+    ASSERT_EQ("user_a", result["hits"][1]["document"]["user_id"].get<std::string>());
+
+    collectionManager.drop_collection("Users");
+    collectionManager.drop_collection("Repos");
+    collectionManager.drop_collection("Links");
+}
+
 TEST_F(CollectionJoinTest, IncludeFieldsByReference_SingleMatch) {
     auto schema_json =
             R"({

From c4c59169f08c84a69e27730b03dce5ab505451f4 Mon Sep 17 00:00:00 2001
From: Harpreet Sangar <happy_san@protonmail.com>
Date: Fri, 10 Mar 2023 13:45:20 +0530
Subject: [PATCH 49/51] Refactor `and_filter_result`.

---
 src/index.cpp | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/index.cpp b/src/index.cpp
index 03bc00ab..3f6809b7 100644
--- a/src/index.cpp
+++ b/src/index.cpp
@@ -2108,13 +2108,14 @@ void and_filter_result(const filter_result_t& a, const filter_result_t& b, filte
     const uint32_t *endB = B + lenB;
 
     for (auto const& item: a.reference_filter_results) {
-        result.reference_filter_results[item.first];
+        if (result.reference_filter_results.count(item.first) == 0) {
+            result.reference_filter_results[item.first] = new reference_filter_result_t[std::min(lenA, lenB)];
+        }
     }
     for (auto const& item: b.reference_filter_results) {
-        result.reference_filter_results[item.first];
-    }
-    for (auto& item: result.reference_filter_results) {
-        item.second = new reference_filter_result_t[std::min(lenA, lenB)];
+        if (result.reference_filter_results.count(item.first) == 0) {
+            result.reference_filter_results[item.first] = new reference_filter_result_t[std::min(lenA, lenB)];
+        }
     }
 
     while (true) {

From 3be000609a6b240aa09476b02eb9562395ccab0e Mon Sep 17 00:00:00 2001
From: Harpreet Sangar <happy_san@protonmail.com>
Date: Fri, 10 Mar 2023 16:21:14 +0530
Subject: [PATCH 50/51] Add tests for `and_filter_result`.

---
 include/field.h               | 61 +++++++++++++++++++++++++
 src/index.cpp                 | 63 +-------------------------
 test/collection_join_test.cpp | 83 +++++++++++++++++++++++++++++++++++
 3 files changed, 145 insertions(+), 62 deletions(-)

diff --git a/include/field.h b/include/field.h
index a4ac81b8..87a17702 100644
--- a/include/field.h
+++ b/include/field.h
@@ -660,6 +660,67 @@ struct filter_result_t {
             delete[] item.second;
         }
     }
+
+    static void and_filter_results(const filter_result_t& a, const filter_result_t& b, filter_result_t& result) {
+        auto lenA = a.count, lenB = b.count;
+        if (lenA == 0 || lenB == 0) {
+            return;
+        }
+
+        result.docs = new uint32_t[std::min(lenA, lenB)];
+
+        auto A = a.docs, B = b.docs, out = result.docs;
+        const uint32_t *endA = A + lenA;
+        const uint32_t *endB = B + lenB;
+
+        for (auto const& item: a.reference_filter_results) {
+            if (result.reference_filter_results.count(item.first) == 0) {
+                result.reference_filter_results[item.first] = new reference_filter_result_t[std::min(lenA, lenB)];
+            }
+        }
+        for (auto const& item: b.reference_filter_results) {
+            if (result.reference_filter_results.count(item.first) == 0) {
+                result.reference_filter_results[item.first] = new reference_filter_result_t[std::min(lenA, lenB)];
+            }
+        }
+
+        while (true) {
+            while (*A < *B) {
+                SKIP_FIRST_COMPARE:
+                if (++A == endA) {
+                    result.count = out - result.docs;
+                    return;
+                }
+            }
+            while (*A > *B) {
+                if (++B == endB) {
+                    result.count = out - result.docs;
+                    return;
+                }
+            }
+            if (*A == *B) {
+                *out = *A;
+
+                for (auto const& item: a.reference_filter_results) {
+                    result.reference_filter_results[item.first][out - result.docs] = item.second[A - a.docs];
+                    item.second[A - a.docs].docs = nullptr;
+                }
+                for (auto const& item: b.reference_filter_results) {
+                    result.reference_filter_results[item.first][out - result.docs] = item.second[B - b.docs];
+                    item.second[B - b.docs].docs = nullptr;
+                }
+
+                out++;
+
+                if (++A == endA || ++B == endB) {
+                    result.count = out - result.docs;
+                    return;
+                }
+            } else {
+                goto SKIP_FIRST_COMPARE;
+            }
+        }
+    }
 };
 
 namespace sort_field_const {
diff --git a/src/index.cpp b/src/index.cpp
index 3f6809b7..0e09cc8b 100644
--- a/src/index.cpp
+++ b/src/index.cpp
@@ -2095,67 +2095,6 @@ Option<bool> Index::rearrange_filter_tree(filter_node_t* const root,
     return Option(true);
 }
 
-void and_filter_result(const filter_result_t& a, const filter_result_t& b, filter_result_t& result) {
-    auto lenA = a.count, lenB = b.count;
-    if (lenA == 0 || lenB == 0) {
-        return;
-    }
-
-    result.docs = new uint32_t[std::min(lenA, lenB)];
-
-    auto A = a.docs, B = b.docs, out = result.docs;
-    const uint32_t *endA = A + lenA;
-    const uint32_t *endB = B + lenB;
-
-    for (auto const& item: a.reference_filter_results) {
-        if (result.reference_filter_results.count(item.first) == 0) {
-            result.reference_filter_results[item.first] = new reference_filter_result_t[std::min(lenA, lenB)];
-        }
-    }
-    for (auto const& item: b.reference_filter_results) {
-        if (result.reference_filter_results.count(item.first) == 0) {
-            result.reference_filter_results[item.first] = new reference_filter_result_t[std::min(lenA, lenB)];
-        }
-    }
-
-    while (true) {
-        while (*A < *B) {
-            SKIP_FIRST_COMPARE:
-            if (++A == endA) {
-                result.count = out - result.docs;
-                return;
-            }
-        }
-        while (*A > *B) {
-            if (++B == endB) {
-                result.count = out - result.docs;
-                return;
-            }
-        }
-        if (*A == *B) {
-            *out = *A;
-
-            for (auto const& item: a.reference_filter_results) {
-                result.reference_filter_results[item.first][out - result.docs] = item.second[A - a.docs];
-                item.second[A - a.docs].docs = nullptr;
-            }
-            for (auto const& item: b.reference_filter_results) {
-                result.reference_filter_results[item.first][out - result.docs] = item.second[B - b.docs];
-                item.second[B - b.docs].docs = nullptr;
-            }
-
-            out++;
-
-            if (++A == endA || ++B == endB) {
-                result.count = out - result.docs;
-                return;
-            }
-        } else {
-            goto SKIP_FIRST_COMPARE;
-        }
-    }
-}
-
 void copy_reference_ids(filter_result_t& from, filter_result_t& to) {
     if (to.count > 0 && !from.reference_filter_results.empty()) {
         for (const auto &item: from.reference_filter_results) {
@@ -2206,7 +2145,7 @@ Option<bool> Index::recursive_filter(filter_node_t* const root,
         }
 
         if (root->filter_operator == AND) {
-            and_filter_result(l_result, r_result, result);
+            filter_result_t::and_filter_results(l_result, r_result, result);
         } else {
             uint32_t* filtered_results = nullptr;
             result.count = ArrayUtils::or_scalar(
diff --git a/test/collection_join_test.cpp b/test/collection_join_test.cpp
index 6df9d397..23c4b022 100644
--- a/test/collection_join_test.cpp
+++ b/test/collection_join_test.cpp
@@ -551,6 +551,89 @@ TEST_F(CollectionJoinTest, FilterByReference_MultipleMatch) {
     collectionManager.drop_collection("Links");
 }
 
+TEST_F(CollectionJoinTest, AndFilterResults_NoReference) {
+    filter_result_t a;
+    a.count = 9;
+    a.docs = new uint32_t[a.count];
+    for (size_t i = 0; i < a.count; i++) {
+        a.docs[i] = i;
+    }
+
+    filter_result_t b;
+    b.count = 0;
+    uint32_t limit = 10;
+    b.docs = new uint32_t[limit];
+    for (size_t i = 2; i < limit; i++) {
+        if (i % 3 == 0) {
+            b.docs[b.count++] = i;
+        }
+    }
+
+    // a.docs: [0..8] , b.docs: [3, 6, 9]
+    filter_result_t result;
+    filter_result_t::and_filter_results(a, b, result);
+
+    ASSERT_EQ(2, result.count);
+    ASSERT_EQ(0, result.reference_filter_results.size());
+
+    std::vector<uint32_t> docs = {3, 6};
+
+    for(size_t i = 0; i < result.count; i++) {
+        ASSERT_EQ(docs[i], result.docs[i]);
+    }
+}
+
+TEST_F(CollectionJoinTest, AndFilterResults_WithReferences) {
+    filter_result_t a;
+    a.count = 9;
+    a.docs = new uint32_t[a.count];
+    a.reference_filter_results["foo"] = new reference_filter_result_t[a.count];
+    for (size_t i = 0; i < a.count; i++) {
+        a.docs[i] = i;
+
+        auto& reference = a.reference_filter_results["foo"][i];
+        reference.count = 1;
+        reference.docs = new uint32_t[1];
+        reference.docs[0] = 10 - i;
+    }
+
+    filter_result_t b;
+    b.count = 0;
+    uint32_t limit = 10;
+    b.docs = new uint32_t[limit];
+    b.reference_filter_results["bar"] = new reference_filter_result_t[limit];
+    for (size_t i = 2; i < limit; i++) {
+        if (i % 3 == 0) {
+            b.docs[b.count] = i;
+
+            auto& reference = b.reference_filter_results["bar"][b.count++];
+            reference.count = 1;
+            reference.docs = new uint32_t[1];
+            reference.docs[0] = 2 * i;
+        }
+    }
+
+    // a.docs: [0..8] , b.docs: [3, 6, 9]
+    filter_result_t result;
+    filter_result_t::and_filter_results(a, b, result);
+
+    ASSERT_EQ(2, result.count);
+    ASSERT_EQ(2, result.reference_filter_results.size());
+    ASSERT_EQ(1, result.reference_filter_results.count("foo"));
+    ASSERT_EQ(1, result.reference_filter_results.count("bar"));
+
+    std::vector<uint32_t> docs = {3, 6}, foo_reference = {7, 4}, bar_reference = {6, 12};
+
+    for(size_t i = 0; i < result.count; i++) {
+        ASSERT_EQ(docs[i], result.docs[i]);
+
+        ASSERT_EQ(1, result.reference_filter_results["foo"][i].count);
+        ASSERT_EQ(foo_reference[i], result.reference_filter_results["foo"][i].docs[0]);
+        ASSERT_EQ(1, result.reference_filter_results["bar"][i].count);
+        ASSERT_EQ(bar_reference[i], result.reference_filter_results["bar"][i].docs[0]);
+    }
+}
+
 TEST_F(CollectionJoinTest, FilterByNReferences) {
     auto schema_json =
             R"({

From f71888703168da27a7c168ad0db4430d983814c8 Mon Sep 17 00:00:00 2001
From: Harpreet Sangar <happy_san@protonmail.com>
Date: Fri, 10 Mar 2023 18:14:44 +0530
Subject: [PATCH 51/51] Refactor `and_filter_result`.

---
 include/field.h | 61 ++--------------------------------------------
 src/field.cpp   | 65 +++++++++++++++++++++++++++++++++++++++++++++++++
 src/index.cpp   |  1 -
 3 files changed, 67 insertions(+), 60 deletions(-)

diff --git a/include/field.h b/include/field.h
index 87a17702..b7865a29 100644
--- a/include/field.h
+++ b/include/field.h
@@ -661,66 +661,9 @@ struct filter_result_t {
         }
     }
 
-    static void and_filter_results(const filter_result_t& a, const filter_result_t& b, filter_result_t& result) {
-        auto lenA = a.count, lenB = b.count;
-        if (lenA == 0 || lenB == 0) {
-            return;
-        }
+    static void and_filter_results(const filter_result_t& a, const filter_result_t& b, filter_result_t& result);
 
-        result.docs = new uint32_t[std::min(lenA, lenB)];
-
-        auto A = a.docs, B = b.docs, out = result.docs;
-        const uint32_t *endA = A + lenA;
-        const uint32_t *endB = B + lenB;
-
-        for (auto const& item: a.reference_filter_results) {
-            if (result.reference_filter_results.count(item.first) == 0) {
-                result.reference_filter_results[item.first] = new reference_filter_result_t[std::min(lenA, lenB)];
-            }
-        }
-        for (auto const& item: b.reference_filter_results) {
-            if (result.reference_filter_results.count(item.first) == 0) {
-                result.reference_filter_results[item.first] = new reference_filter_result_t[std::min(lenA, lenB)];
-            }
-        }
-
-        while (true) {
-            while (*A < *B) {
-                SKIP_FIRST_COMPARE:
-                if (++A == endA) {
-                    result.count = out - result.docs;
-                    return;
-                }
-            }
-            while (*A > *B) {
-                if (++B == endB) {
-                    result.count = out - result.docs;
-                    return;
-                }
-            }
-            if (*A == *B) {
-                *out = *A;
-
-                for (auto const& item: a.reference_filter_results) {
-                    result.reference_filter_results[item.first][out - result.docs] = item.second[A - a.docs];
-                    item.second[A - a.docs].docs = nullptr;
-                }
-                for (auto const& item: b.reference_filter_results) {
-                    result.reference_filter_results[item.first][out - result.docs] = item.second[B - b.docs];
-                    item.second[B - b.docs].docs = nullptr;
-                }
-
-                out++;
-
-                if (++A == endA || ++B == endB) {
-                    result.count = out - result.docs;
-                    return;
-                }
-            } else {
-                goto SKIP_FIRST_COMPARE;
-            }
-        }
-    }
+    static void or_filter_results(const filter_result_t& a, const filter_result_t& b, filter_result_t& result);
 };
 
 namespace sort_field_const {
diff --git a/src/field.cpp b/src/field.cpp
index c7297359..9b20aeef 100644
--- a/src/field.cpp
+++ b/src/field.cpp
@@ -983,3 +983,68 @@ void field::compact_nested_fields(tsl::htrie_map<char, field>& nested_fields) {
         nested_fields.erase_prefix(field_name + ".");
     }
 }
+
+void filter_result_t::and_filter_results(const filter_result_t& a, const filter_result_t& b, filter_result_t& result) {
+    auto lenA = a.count, lenB = b.count;
+    if (lenA == 0 || lenB == 0) {
+        return;
+    }
+
+    result.docs = new uint32_t[std::min(lenA, lenB)];
+
+    auto A = a.docs, B = b.docs, out = result.docs;
+    const uint32_t *endA = A + lenA;
+    const uint32_t *endB = B + lenB;
+
+    for (auto const& item: a.reference_filter_results) {
+        if (result.reference_filter_results.count(item.first) == 0) {
+            result.reference_filter_results[item.first] = new reference_filter_result_t[std::min(lenA, lenB)];
+        }
+    }
+    for (auto const& item: b.reference_filter_results) {
+        if (result.reference_filter_results.count(item.first) == 0) {
+            result.reference_filter_results[item.first] = new reference_filter_result_t[std::min(lenA, lenB)];
+        }
+    }
+
+    while (true) {
+        while (*A < *B) {
+            SKIP_FIRST_COMPARE:
+            if (++A == endA) {
+                result.count = out - result.docs;
+                return;
+            }
+        }
+        while (*A > *B) {
+            if (++B == endB) {
+                result.count = out - result.docs;
+                return;
+            }
+        }
+        if (*A == *B) {
+            *out = *A;
+
+            for (auto const& item: a.reference_filter_results) {
+                auto& reference = result.reference_filter_results[item.first][out - result.docs];
+                reference.count = item.second[A - a.docs].count;
+                reference.docs = new uint32_t[reference.count];
+                memcpy(reference.docs, item.second[A - a.docs].docs, reference.count * sizeof(uint32_t));
+            }
+            for (auto const& item: b.reference_filter_results) {
+                auto& reference = result.reference_filter_results[item.first][out - result.docs];
+                reference.count = item.second[B - b.docs].count;
+                reference.docs = new uint32_t[reference.count];
+                memcpy(reference.docs, item.second[B - b.docs].docs, reference.count * sizeof(uint32_t));
+            }
+
+            out++;
+
+            if (++A == endA || ++B == endB) {
+                result.count = out - result.docs;
+                return;
+            }
+        } else {
+            goto SKIP_FIRST_COMPARE;
+        }
+    }
+}
diff --git a/src/index.cpp b/src/index.cpp
index 0e09cc8b..c81e4881 100644
--- a/src/index.cpp
+++ b/src/index.cpp
@@ -2158,7 +2158,6 @@ Option<bool> Index::recursive_filter(filter_node_t* const root,
             }
         }
 
-
         return Option(true);
     }