Add object_array_reference_index.

2025-05-21 22:33:27 +08:00 · 2023-11-09 16:15:24 +05:30 · 2023-11-09 16:15:24 +05:30 · 10a334cf07
commit 10a334cf07
parent aa9ff7d37e
7 changed files with 406 additions and 93 deletions
--- a/include/collection.h
+++ b/include/collection.h
@ -320,9 +320,12 @@ private:

    Option<std::string> get_referenced_in_field(const std::string& collection_name) const;

-    Option<bool> get_related_ids(const std::string& ref_collection_name, const uint32_t& seq_id,
+    Option<bool> get_related_ids(const std::string& ref_field_name, const uint32_t& seq_id,
                                 std::vector<uint32_t>& result) const;

+    Option<bool> get_object_array_related_id(const std::string& ref_field_name,
+                                             const uint32_t& seq_id, const uint32_t& object_index,
+                                             uint32_t& result) const;

    void remove_embedding_field(const std::string& field_name);

@ -430,15 +433,15 @@ public:

    static void remove_reference_helper_fields(nlohmann::json& document);

-    static Option<bool> add_reference_fields(nlohmann::json& doc,
-                                             const std::string& ref_collection_name,
-                                             Collection *const ref_collection,
-                                             const std::string& alias,
-                                             const reference_filter_result_t& references,
-                                             const tsl::htrie_set<char>& ref_include_fields_full,
-                                             const tsl::htrie_set<char>& ref_exclude_fields_full,
-                                             const std::string& error_prefix, const bool& is_reference_array,
-                                             const bool& nest_ref_doc);
+    static Option<bool> include_references(nlohmann::json& doc,
+                                           const std::string& ref_collection_name,
+                                           Collection *const ref_collection,
+                                           const std::string& alias,
+                                           const reference_filter_result_t& references,
+                                           const tsl::htrie_set<char>& ref_include_fields_full,
+                                           const tsl::htrie_set<char>& ref_exclude_fields_full,
+                                           const std::string& error_prefix, const bool& is_reference_array,
+                                           const bool& nest_ref_doc);

    static Option<bool> prune_doc(nlohmann::json& doc, const tsl::htrie_set<char>& include_names,
                                  const tsl::htrie_set<char>& exclude_names, const std::string& parent_name = "",
--- a/include/index.h
+++ b/include/index.h
@ -335,6 +335,13 @@ struct Hasher32 {
    size_t operator()(uint32_t k) const { return (k ^ 2166136261U)  * 16777619UL; }
 };

+struct pair_hash {
+    template <class T1, class T2>
+    std::size_t operator() (const std::pair<T1, T2> &pair) const {
+        return std::hash<T1>()(pair.first) ^ std::hash<T2>()(pair.second);
+    }
+};
+
 class Index {
 private:
    mutable std::shared_mutex mutex;
@ -361,6 +368,10 @@ private:
    // Only used when the reference field is an array type otherwise sort_index is used.
    spp::sparse_hash_map<std::string, num_tree_t*> reference_index;

+    /// field_name => ((doc_id, object_index) => ref_doc_id)
+    /// Used when a field inside an object array has reference.
+    spp::sparse_hash_map<std::string, spp::sparse_hash_map<std::pair<uint32_t, uint32_t>, uint32_t, pair_hash>*> object_array_reference_index;
+
    spp::sparse_hash_map<std::string, NumericTrie*> range_index;

    spp::sparse_hash_map<std::string, NumericTrie*> geo_range_index;
@ -1026,6 +1037,11 @@ public:
                                 const std::string& reference_helper_field_name,
                                 const uint32_t& seq_id, std::vector<uint32_t>& result) const;

+    Option<bool> get_object_array_related_id(const std::string& collection_name,
+                                             const std::string& reference_helper_field_name,
+                                             const uint32_t& seq_id, const uint32_t& object_index,
+                                             uint32_t& result) const;
+
    Option<uint32_t> get_sort_index_value_with_lock(const std::string& collection_name,
                                                    const std::string& field_name,
                                                    const uint32_t& seq_id) const;
--- a/src/collection.cpp
+++ b/src/collection.cpp
@ -202,16 +202,23 @@ Option<bool> Collection::add_reference_helper_fields(nlohmann::json& document, c
        if (is_object_reference_field && is_object_array) {
            if (!document[field_name].is_array()) {
                return Option<bool>(400, "Expected `" + field_name + "` to be an array.");
-            } else if (document[object_key].size() != document[field_name].size()) {
-                return Option<bool>(400, "Expected the same number of elements in `" + field_name + "` and `" +
-                                            object_key + "`.");
            }

            document[reference_helper_field] = nlohmann::json::array();
            nlohmann::json temp_doc; // To store singular values of `field_name` field.

-            for (const auto &item: document[field_name]) {
-                temp_doc[field_name] = item;
+            std::vector<std::string> keys;
+            StringUtils::split(field_name, keys, ".");
+            auto const& object_array = document[keys[0]];
+
+            for (uint32_t i = 0; i < object_array.size(); i++) {
+                if (optional && object_array[i].count(keys[1]) == 0) {
+                    continue;
+                } else if (object_array[i].count(keys[1]) == 0) {
+                    return Option<bool>(400, "Object at index `" + std::to_string(i) + "` is missing `" + field_name + "`.");
+                }
+
+                temp_doc[field_name] = object_array[i].at(keys[1]);
                auto single_value_filter_query_op = single_value_filter_query(temp_doc, field_name, ref_field_type,
                                                                              filter_query);
                if (!single_value_filter_query_op.ok()) {
@ -233,7 +240,9 @@ Option<bool> Collection::add_reference_helper_fields(nlohmann::json& document, c
                                              reference_collection_name + "`.");
                }

-                document[reference_helper_field] += filter_result.docs[0];
+                // Adding the index of the object along with referenced doc id to account for the scenario where a
+                // reference field of an object array might be optional and missing.
+                document[reference_helper_field] += nlohmann::json::array({i, filter_result.docs[0]});
                filter_query = reference_field_name + ": ";
            }
            continue;
@ -3380,6 +3389,12 @@ Option<bool> Collection::get_related_ids(const std::string& ref_field_name, cons
    return index->get_related_ids(name, ref_field_name, seq_id, result);
 }

+Option<bool> Collection::get_object_array_related_id(const std::string& ref_field_name,
+                                                     const uint32_t& seq_id, const uint32_t& object_index,
+                                                     uint32_t& result) const {
+    return index->get_object_array_related_id(name, ref_field_name, seq_id, object_index, result);
+}
+
 Option<bool> Collection::get_reference_filter_ids(const std::string & filter_query,
                                                  filter_result_t& filter_result,
                                                  const std::string& reference_field_name) const {
@ -4807,15 +4822,15 @@ void Collection::remove_reference_helper_fields(nlohmann::json& document) {
    }
 }

-Option<bool> Collection::add_reference_fields(nlohmann::json& doc,
-                                              const std::string& ref_collection_name,
-                                              Collection *const ref_collection,
-                                              const std::string& alias,
-                                              const reference_filter_result_t& references,
-                                              const tsl::htrie_set<char>& ref_include_fields_full,
-                                              const tsl::htrie_set<char>& ref_exclude_fields_full,
-                                              const std::string& error_prefix, const bool& is_reference_array,
-                                              const bool& nest_ref_doc) {
+Option<bool> Collection::include_references(nlohmann::json& doc,
+                                            const std::string& ref_collection_name,
+                                            Collection *const ref_collection,
+                                            const std::string& alias,
+                                            const reference_filter_result_t& references,
+                                            const tsl::htrie_set<char>& ref_include_fields_full,
+                                            const tsl::htrie_set<char>& ref_exclude_fields_full,
+                                            const std::string& error_prefix, const bool& is_reference_array,
+                                            const bool& nest_ref_doc) {
    // One-to-one relation.
    if (!is_reference_array && references.count == 1) {
        auto ref_doc_seq_id = references.docs[0];
@ -5046,7 +5061,7 @@ Option<bool> Collection::prune_doc(nlohmann::json& doc,
            return Option<bool>(include_exclude_op.code(), error_prefix + include_exclude_op.error());
        }

-        Option<bool> add_reference_fields_op = Option<bool>(true);
+        Option<bool> include_references_op = Option<bool>(true);
        if (has_filter_reference) {
            auto get_reference_field_op = collection->get_referenced_in_field(ref_collection_name);
            if (!get_reference_field_op.ok()) {
@ -5056,12 +5071,12 @@ Option<bool> Collection::prune_doc(nlohmann::json& doc,
            if (ref_collection->search_schema.count(field_name) == 0) {
                continue;
            }
-            add_reference_fields_op = add_reference_fields(doc, ref_include.collection_name,
-                                                           ref_collection.get(), ref_include.alias,
-                                                           reference_filter_results.at(ref_collection_name),
-                                                           ref_include_fields_full, ref_exclude_fields_full, error_prefix,
-                                                           ref_collection->get_schema().at(field_name).is_array(),
-                                                           ref_include.nest_ref_doc);
+            include_references_op = include_references(doc, ref_include.collection_name,
+                                                       ref_collection.get(), ref_include.alias,
+                                                       reference_filter_results.at(ref_collection_name),
+                                                       ref_include_fields_full, ref_exclude_fields_full, error_prefix,
+                                                       ref_collection->get_schema().at(field_name).is_array(),
+                                                       ref_include.nest_ref_doc);
        } else if (doc_has_reference) {
            auto get_reference_field_op = ref_collection->get_referenced_in_field_with_lock(collection->name);
            if (!get_reference_field_op.ok()) {
@ -5072,13 +5087,6 @@ Option<bool> Collection::prune_doc(nlohmann::json& doc,
                continue;
            }

-            reference_filter_result_t result;
-            std::vector<uint32_t> ids;
-            auto get_references_op = collection->get_related_ids(field_name, seq_id, ids);
-            if (!get_references_op.ok()) {
-                continue;
-            }
-
            if (collection->object_reference_helper_fields.count(field_name) != 0) {
                std::vector<std::string> keys;
                StringUtils::split(field_name, keys, ".");
@ -5087,34 +5095,55 @@ Option<bool> Collection::prune_doc(nlohmann::json& doc,
                                             "` in the document to include the referenced document.");
                }

-                result.count = ids.size();
-                result.docs = &ids[0];
                if (doc[keys[0]].is_array()) {
-                    for (uint32_t i = 0; i < result.count; i++) {
-                        add_reference_fields_op = add_reference_fields(doc[keys[0]][i], ref_include.collection_name,
-                                                                       ref_collection.get(), ref_include.alias,
-                                                                       reference_filter_result_t(1, new uint32_t[1] {result.docs[i]}),
-                                                                       ref_include_fields_full, ref_exclude_fields_full, error_prefix,
-                                                                       false, ref_include.nest_ref_doc);
-                    }
-                } else {
-                    add_reference_fields_op = add_reference_fields(doc[keys[0]], ref_include.collection_name,
+                    for (uint32_t i = 0; i < doc[keys[0]].size(); i++) {
+                        uint32_t ref_doc_id;
+                        auto op = collection->get_object_array_related_id(field_name, seq_id, i, ref_doc_id);
+                        if (!op.ok()) {
+                            if (op.code() == 404) { // field_name is not indexed.
+                                break;
+                            } else { // No reference found for this object.
+                                continue;
+                            }
+                        }
+
+                        reference_filter_result_t result(1, new uint32_t[1]{ref_doc_id});
+                        include_references_op = include_references(doc[keys[0]][i], ref_include.collection_name,
                                                                   ref_collection.get(), ref_include.alias, result,
                                                                   ref_include_fields_full, ref_exclude_fields_full, error_prefix,
-                                                                   collection->search_schema.at(field_name).is_array(),
-                                                                   ref_include.nest_ref_doc);
-                }
-            } else {
-                result.count = ids.size();
-                result.docs = &ids[0];
-                add_reference_fields_op = add_reference_fields(doc, ref_include.collection_name,
+                                                                   false, ref_include.nest_ref_doc);
+                        if (!include_references_op.ok()) {
+                            return include_references_op;
+                        }
+                    }
+                } else {
+                    std::vector<uint32_t> ids;
+                    auto get_references_op = collection->get_related_ids(field_name, seq_id, ids);
+                    if (!get_references_op.ok()) {
+                        continue;
+                    }
+                    reference_filter_result_t result(ids.size(), &ids[0]);
+                    include_references_op = include_references(doc[keys[0]], ref_include.collection_name,
                                                               ref_collection.get(), ref_include.alias, result,
                                                               ref_include_fields_full, ref_exclude_fields_full, error_prefix,
                                                               collection->search_schema.at(field_name).is_array(),
                                                               ref_include.nest_ref_doc);
+                    result.docs = nullptr;
+                }
+            } else {
+                std::vector<uint32_t> ids;
+                auto get_references_op = collection->get_related_ids(field_name, seq_id, ids);
+                if (!get_references_op.ok()) {
+                    continue;
+                }
+                reference_filter_result_t result(ids.size(), &ids[0]);
+                include_references_op = include_references(doc, ref_include.collection_name,
+                                                           ref_collection.get(), ref_include.alias, result,
+                                                           ref_include_fields_full, ref_exclude_fields_full, error_prefix,
+                                                           collection->search_schema.at(field_name).is_array(),
+                                                           ref_include.nest_ref_doc);
+                result.docs = nullptr;
            }
-
-            result.docs = nullptr;
        } else if (joined_coll_has_reference) {
            auto joined_collection = cm.get_collection(joined_coll_having_reference);
            if (joined_collection == nullptr) {
@ -5144,16 +5173,16 @@ Option<bool> Collection::prune_doc(nlohmann::json& doc,
            reference_filter_result_t result;
            result.count = ids.size();
            result.docs = &ids[0];
-            add_reference_fields_op = add_reference_fields(doc, ref_include.collection_name,
-                                                           ref_collection.get(), ref_include.alias, result,
-                                                           ref_include_fields_full, ref_exclude_fields_full, error_prefix,
-                                                           joined_collection->get_schema().at(reference_field_name).is_array(),
-                                                           ref_include.nest_ref_doc);
+            include_references_op = include_references(doc, ref_include.collection_name,
+                                                       ref_collection.get(), ref_include.alias, result,
+                                                       ref_include_fields_full, ref_exclude_fields_full, error_prefix,
+                                                       joined_collection->get_schema().at(reference_field_name).is_array(),
+                                                       ref_include.nest_ref_doc);
            result.docs = nullptr;
        }

-        if (!add_reference_fields_op.ok()) {
-            return add_reference_fields_op;
+        if (!include_references_op.ok()) {
+            return include_references_op;
        }
    }

--- a/src/field.cpp
+++ b/src/field.cpp
@ -335,11 +335,11 @@ Option<bool> field::json_field_to_field(bool enable_nested_fields, nlohmann::jso
    if (!field_json[fields::reference].get<std::string>().empty()) {
        // Add a reference helper field in the schema. It stores the doc id of the document it references to reduce the
        // computation while searching.
-        the_fields.emplace_back(
-                field(field_json[fields::name].get<std::string>() + fields::REFERENCE_HELPER_FIELD_SUFFIX,
-                      field_types::is_array(field_json[fields::type].get<std::string>()) ? field_types::INT64_ARRAY : field_types::INT64,
-                      false, field_json[fields::optional], true)
-        );
+        auto f = field(field_json[fields::name].get<std::string>() + fields::REFERENCE_HELPER_FIELD_SUFFIX,
+                       field_types::is_array(field_json[fields::type].get<std::string>()) ? field_types::INT64_ARRAY : field_types::INT64,
+                       false, field_json[fields::optional], true);
+        f.nested = field_json[fields::nested];
+        the_fields.emplace_back(std::move(f));
    }

    return Option<bool>(true);
--- a/src/index.cpp
+++ b/src/index.cpp
@ -129,6 +129,17 @@ Index::Index(const std::string& name, const uint32_t collection_id, const Store*
        if (a_field.is_reference_helper && a_field.is_array()) {
            auto num_tree = new num_tree_t;
            reference_index.emplace(a_field.name, num_tree);
+
+            if (a_field.nested) {
+                std::vector<std::string> keys;
+                StringUtils::split(a_field.name, keys, ".");
+
+                // `object_array_reference_index` only includes the reference fields that are part of an object array.
+                if (search_schema.count(keys[0]) != 0 && search_schema.at(keys[0]).is_array()) {
+                    auto index = new spp::sparse_hash_map<std::pair<uint32_t, uint32_t>, uint32_t, pair_hash>();
+                    object_array_reference_index.emplace(a_field.name, index);
+                }
+            }
        }
    }

@ -216,6 +227,13 @@ Index::~Index() {
    }

    reference_index.clear();
+
+    for(auto & name_tree: object_array_reference_index) {
+        delete name_tree.second;
+        name_tree.second = nullptr;
+    }
+
+    object_array_reference_index.clear();
 }

 int64_t Index::get_points_from_doc(const nlohmann::json &document, const std::string & default_sorting_field) {
@ -1008,8 +1026,10 @@ void Index::index_field_in_memory(const field& afield, std::vector<index_record>
            // all other numerical arrays
            auto num_tree = afield.range_index ? nullptr : numerical_index.at(afield.name);
            auto trie = afield.range_index ? range_index.at(afield.name) : nullptr;
-            auto reference = afield.is_reference_helper ? reference_index.at(afield.name) : nullptr;
-            iterate_and_index_numerical_field(iter_batch, afield, [&afield, num_tree, trie, reference]
+            auto reference = reference_index.count(afield.name) != 0 ? reference_index.at(afield.name) : nullptr;
+            auto object_array_reference = object_array_reference_index.count(afield.name) != 0 ?
+                                                                object_array_reference_index.at(afield.name) : nullptr;
+            iterate_and_index_numerical_field(iter_batch, afield, [&afield, num_tree, trie, reference, object_array_reference]
                    (const index_record& record, uint32_t seq_id) {
                for(size_t arr_i = 0; arr_i < record.doc[afield.name].size(); arr_i++) {
                    const auto& arr_value = record.doc[afield.name][arr_i];
@ -1024,7 +1044,13 @@ void Index::index_field_in_memory(const field& afield, std::vector<index_record>
                    }

                    else if(afield.type == field_types::INT64_ARRAY) {
-                        const int64_t value = arr_value;
+                        int64_t value;
+                        if (object_array_reference != nullptr) { // arr_value is an array [object_index, value]
+                            value = arr_value.at(1);
+                        } else {
+                            value = arr_value;
+                        }
+
                        if (afield.range_index) {
                            trie->insert(value, seq_id);
                        } else {
@ -1033,6 +1059,9 @@ void Index::index_field_in_memory(const field& afield, std::vector<index_record>
                        if (reference != nullptr) {
                            reference->insert(seq_id, value);
                        }
+                        if (object_array_reference != nullptr) {
+                            (*object_array_reference)[std::make_pair(seq_id, arr_value.at(0))] = value;
+                        }
                    }

                    else if(afield.type == field_types::FLOAT_ARRAY) {
@ -1795,6 +1824,9 @@ Option<bool> Index::do_reference_filtering_with_lock(filter_node_t* const filter
        auto const& ref_index = *sort_index.at(reference_helper_field_name);
        for (uint32_t i = 0; i < count; i++) {
            auto& reference_doc_id = reference_docs[i];
+            if (ref_index.count(reference_doc_id) == 0) { // Reference field might be optional.
+                continue;
+            }
            auto doc_id = ref_index.at(reference_doc_id);

            id_pairs.emplace_back(std::pair(doc_id, reference_doc_id));
@ -7117,6 +7149,23 @@ Option<bool> Index::get_related_ids(const std::string& collection_name, const st
    return Option<bool>(true);
 }

+Option<bool> Index::get_object_array_related_id(const std::string& collection_name,
+                                                const std::string& field_name,
+                                                const uint32_t& seq_id, const uint32_t& object_index,
+                                                uint32_t& result) const {
+    std::shared_lock lock(mutex);
+    if (object_array_reference_index.count(field_name) == 0 || object_array_reference_index.at(field_name) == nullptr) {
+        return Option<bool>(404, "`" + field_name + "` not found in `" + collection_name +
+                                    ".object_array_reference_index`");
+    } else if (object_array_reference_index.at(field_name)->count({seq_id, object_index}) == 0) {
+        return Option<bool>(400, "Key `{" + std::to_string(seq_id) + ", " + std::to_string(object_index) + "}`"
+                                    " not found in `" + collection_name + ".object_array_reference_index`");
+    }
+
+    result = object_array_reference_index.at(field_name)->at({seq_id, object_index});
+    return Option<bool>(true);
+}
+
 Option<uint32_t> Index::get_sort_index_value_with_lock(const std::string& collection_name,
                                                       const std::string& field_name,
                                                       const uint32_t& seq_id) const {
--- a/src/validator.cpp
+++ b/src/validator.cpp
@ -359,6 +359,13 @@ Option<uint32_t> validator_t::coerce_int64_t(const DIRTY_VALUES& dirty_values, c
        item = std::atoll(item.get<std::string>().c_str());
    }

+    else if(is_array) {
+        if(!a_field.nested || !a_field.is_reference_helper || item.size() != 2 ||
+            !item.at(0).is_number() || !item.at(1).is_number()) {
+            return Option<>(400, "Expected `" + field_name + "` to be an object array reference helper field.");
+        }
+    }
+
    else {
        if(dirty_values == DIRTY_VALUES::COERCE_OR_DROP) {
            if(!a_field.optional) {
--- a/test/collection_join_test.cpp
+++ b/test/collection_join_test.cpp
@ -962,8 +962,12 @@ TEST_F(CollectionJoinTest, IndexDocumentHavingReferenceField) {
    doc = coll2->get("3").get();
    ASSERT_EQ(1, doc.count("object_array.ref_array_field_sequence_id"));
    ASSERT_EQ(2, doc["object_array.ref_array_field_sequence_id"].size());
-    ASSERT_EQ(0, doc["object_array.ref_array_field_sequence_id"][0]);
-    ASSERT_EQ(1, doc["object_array.ref_array_field_sequence_id"][1]);
+    ASSERT_EQ(2, doc["object_array.ref_array_field_sequence_id"][0].size());
+    ASSERT_EQ(0, doc["object_array.ref_array_field_sequence_id"][0][0]);
+    ASSERT_EQ(0, doc["object_array.ref_array_field_sequence_id"][0][1]);
+    ASSERT_EQ(2, doc["object_array.ref_array_field_sequence_id"][1].size());
+    ASSERT_EQ(1, doc["object_array.ref_array_field_sequence_id"][1][0]);
+    ASSERT_EQ(1, doc["object_array.ref_array_field_sequence_id"][1][1]);
    ASSERT_EQ(1, doc.count(".ref"));
    ASSERT_EQ(1, doc[".ref"].size());
    ASSERT_EQ("object_array.ref_array_field_sequence_id", doc[".ref"][0]);
@ -2763,6 +2767,206 @@ TEST_F(CollectionJoinTest, FilterByReferenceArrayField) {

 TEST_F(CollectionJoinTest, FilterByObjectReferenceField) {
    auto schema_json =
+            R"({
+                "name": "Products",
+                "fields": [
+                    {"name": "product_id", "type": "string"},
+                    {"name": "price", "type": "int32"},
+                    {"name": "name", "type": "string"}
+                ]
+            })"_json;
+    std::vector<nlohmann::json> documents = {
+            R"({
+                "product_id": "product_a",
+                "price": 50,
+                "name": "soap"
+            })"_json,
+            R"({
+                "product_id": "product_b",
+                "price": 10,
+                "name": "shampoo"
+            })"_json,
+            R"({
+                "product_id": "product_c",
+                "price": 120,
+                "name": "milk"
+            })"_json
+    };
+
+    auto collection_create_op = collectionManager.create_collection(schema_json);
+    ASSERT_TRUE(collection_create_op.ok());
+    for (auto const &json: documents) {
+        auto add_op = collection_create_op.get()->add(json.dump());
+        if (!add_op.ok()) {
+            LOG(INFO) << add_op.error();
+        }
+        ASSERT_TRUE(add_op.ok());
+    }
+
+    schema_json =
+            R"({
+                "name": "coll1",
+                "fields": [
+                    {"name": "coll_id", "type": "string"},
+                    {"name": "object.reference", "type": "string", "reference": "Products.product_id", "optional": true},
+                    {"name": "object", "type": "object"}
+                ],
+                "enable_nested_fields": true
+            })"_json;
+    documents = {
+            R"({
+                "coll_id": "a",
+                "object": {}
+            })"_json,
+            R"({
+                "coll_id": "b",
+                "object": {
+                    "reference": "product_c"
+                }
+            })"_json
+    };
+
+    collection_create_op = collectionManager.create_collection(schema_json);
+    ASSERT_TRUE(collection_create_op.ok());
+    for (auto const &json: documents) {
+        auto add_op = collection_create_op.get()->add(json.dump());
+        if (!add_op.ok()) {
+            LOG(INFO) << add_op.error();
+        }
+        ASSERT_TRUE(add_op.ok());
+    }
+
+    std::map<std::string, std::string> req_params = {
+            {"collection", "coll1"},
+            {"q", "*"},
+            {"include_fields", "$Products(product_id)"}
+    };
+    nlohmann::json embedded_params;
+    std::string json_res;
+    auto now_ts = std::chrono::duration_cast<std::chrono::microseconds>(
+            std::chrono::system_clock::now().time_since_epoch()).count();
+
+    auto search_op_bool = collectionManager.do_search(req_params, embedded_params, json_res, now_ts);
+    ASSERT_TRUE(search_op_bool.ok());
+
+    auto res_obj = nlohmann::json::parse(json_res);
+    ASSERT_EQ(2, res_obj["found"].get<size_t>());
+    ASSERT_EQ(2, res_obj["hits"].size());
+    ASSERT_EQ(3, res_obj["hits"][0]["document"].size());
+    ASSERT_EQ("b", res_obj["hits"][0]["document"]["coll_id"]);
+    ASSERT_EQ(2, res_obj["hits"][0]["document"]["object"].size());
+    ASSERT_EQ("product_c", res_obj["hits"][0]["document"]["object"]["reference"]);
+    ASSERT_EQ(1, res_obj["hits"][0]["document"]["object"].count("Products"));
+    ASSERT_EQ(1, res_obj["hits"][0]["document"]["object"]["Products"].size());
+    ASSERT_EQ(1, res_obj["hits"][0]["document"]["object"]["Products"].count("product_id"));
+    ASSERT_EQ("product_c", res_obj["hits"][0]["document"]["object"]["Products"]["product_id"]);
+    ASSERT_EQ(3, res_obj["hits"][1]["document"].size());
+    ASSERT_EQ("a", res_obj["hits"][1]["document"]["coll_id"]);
+    ASSERT_EQ(0, res_obj["hits"][1]["document"]["object"].size());
+
+    req_params = {
+            {"collection", "Products"},
+            {"q", "*"},
+            {"filter_by", "$coll1(id: *)"},
+            {"include_fields", "$coll1(coll_id)"}
+    };
+    search_op_bool = collectionManager.do_search(req_params, embedded_params, json_res, now_ts);
+    ASSERT_TRUE(search_op_bool.ok());
+
+    res_obj = nlohmann::json::parse(json_res);
+    ASSERT_EQ(1, res_obj["found"].get<size_t>());
+    ASSERT_EQ(1, res_obj["hits"].size());
+    ASSERT_EQ(5, res_obj["hits"][0]["document"].size());
+    ASSERT_EQ("product_c", res_obj["hits"][0]["document"]["product_id"]);
+    ASSERT_EQ(1, res_obj["hits"][0]["document"].count("coll1"));
+    ASSERT_EQ(1, res_obj["hits"][0]["document"]["coll1"].size());
+    ASSERT_EQ(1, res_obj["hits"][0]["document"]["coll1"].count("coll_id"));
+    ASSERT_EQ("b", res_obj["hits"][0]["document"]["coll1"]["coll_id"]);
+
+    schema_json =
+            R"({
+                "name": "coll2",
+                "fields": [
+                    {"name": "coll_id", "type": "string"},
+                    {"name": "object.reference_array", "type": "string[]", "reference": "Products.product_id", "optional": true},
+                    {"name": "object", "type": "object"}
+                ],
+                "enable_nested_fields": true
+            })"_json;
+    documents = {
+            R"({
+                "coll_id": "a",
+                "object": {}
+            })"_json,
+            R"({
+                "coll_id": "b",
+                "object": {
+                    "reference_array": ["product_a", "product_b"]
+                }
+            })"_json
+    };
+
+    collection_create_op = collectionManager.create_collection(schema_json);
+    ASSERT_TRUE(collection_create_op.ok());
+    for (auto const &json: documents) {
+        auto add_op = collection_create_op.get()->add(json.dump());
+        if (!add_op.ok()) {
+            LOG(INFO) << add_op.error();
+        }
+        ASSERT_TRUE(add_op.ok());
+    }
+
+    req_params = {
+            {"collection", "coll2"},
+            {"q", "*"},
+            {"include_fields", "$Products(product_id)"}
+    };
+    search_op_bool = collectionManager.do_search(req_params, embedded_params, json_res, now_ts);
+    ASSERT_TRUE(search_op_bool.ok());
+
+    res_obj = nlohmann::json::parse(json_res);
+    ASSERT_EQ(2, res_obj["found"].get<size_t>());
+    ASSERT_EQ(2, res_obj["hits"].size());
+    ASSERT_EQ(3, res_obj["hits"][0]["document"].size());
+    ASSERT_EQ("b", res_obj["hits"][0]["document"]["coll_id"]);
+    ASSERT_EQ(2, res_obj["hits"][0]["document"]["object"].size());
+    ASSERT_EQ("product_a", res_obj["hits"][0]["document"]["object"]["reference_array"][0]);
+    ASSERT_EQ("product_b", res_obj["hits"][0]["document"]["object"]["reference_array"][1]);
+    ASSERT_EQ(1, res_obj["hits"][0]["document"]["object"].count("Products"));
+    ASSERT_EQ(2, res_obj["hits"][0]["document"]["object"]["Products"].size());
+    ASSERT_EQ(1, res_obj["hits"][0]["document"]["object"]["Products"][0].count("product_id"));
+    ASSERT_EQ("product_a", res_obj["hits"][0]["document"]["object"]["Products"][0]["product_id"]);
+    ASSERT_EQ(1, res_obj["hits"][0]["document"]["object"]["Products"][1].count("product_id"));
+    ASSERT_EQ("product_b", res_obj["hits"][0]["document"]["object"]["Products"][1]["product_id"]);
+    ASSERT_EQ(3, res_obj["hits"][1]["document"].size());
+    ASSERT_EQ("a", res_obj["hits"][1]["document"]["coll_id"]);
+    ASSERT_EQ(0, res_obj["hits"][1]["document"]["object"].size());
+
+    req_params = {
+            {"collection", "Products"},
+            {"q", "*"},
+            {"filter_by", "$coll2(id: *)"},
+            {"include_fields", "$coll2(coll_id)"}
+    };
+    search_op_bool = collectionManager.do_search(req_params, embedded_params, json_res, now_ts);
+    ASSERT_TRUE(search_op_bool.ok());
+
+    res_obj = nlohmann::json::parse(json_res);
+    ASSERT_EQ(2, res_obj["found"].get<size_t>());
+    ASSERT_EQ(2, res_obj["hits"].size());
+    ASSERT_EQ(5, res_obj["hits"][0]["document"].size());
+    ASSERT_EQ("product_b", res_obj["hits"][0]["document"]["product_id"]);
+    ASSERT_EQ(1, res_obj["hits"][0]["document"].count("coll2"));
+    ASSERT_EQ(1, res_obj["hits"][0]["document"]["coll2"].size());
+    ASSERT_EQ(1, res_obj["hits"][0]["document"]["coll2"][0].count("coll_id"));
+    ASSERT_EQ("b", res_obj["hits"][0]["document"]["coll2"][0]["coll_id"]);
+    ASSERT_EQ("product_a", res_obj["hits"][1]["document"]["product_id"]);
+    ASSERT_EQ(1, res_obj["hits"][1]["document"].count("coll2"));
+    ASSERT_EQ(1, res_obj["hits"][1]["document"]["coll2"].size());
+    ASSERT_EQ(1, res_obj["hits"][1]["document"]["coll2"][0].count("coll_id"));
+    ASSERT_EQ("b", res_obj["hits"][1]["document"]["coll2"][0]["coll_id"]);
+
+    schema_json =
            R"({
                "name": "Portions",
                "fields": [
@ -2771,7 +2975,7 @@ TEST_F(CollectionJoinTest, FilterByObjectReferenceField) {
                    {"name": "unit", "type": "string"}
                ]
            })"_json;
-    std::vector<nlohmann::json> documents = {
+    documents = {
            R"({
                "portion_id": "portion_a",
                "quantity": 500,
@ -2789,7 +2993,7 @@ TEST_F(CollectionJoinTest, FilterByObjectReferenceField) {
            })"_json
    };

-    auto collection_create_op = collectionManager.create_collection(schema_json);
+    collection_create_op = collectionManager.create_collection(schema_json);
    ASSERT_TRUE(collection_create_op.ok());
    for (auto const &json: documents) {
        auto add_op = collection_create_op.get()->add(json.dump());
@ -2805,7 +3009,7 @@ TEST_F(CollectionJoinTest, FilterByObjectReferenceField) {
                "fields": [
                    {"name": "name", "type": "string"},
                    {"name": "portions", "type": "object[]"},
-                    {"name": "portions.portion_id", "type": "string[]", "reference": "Portions.portion_id"}
+                    {"name": "portions.portion_id", "type": "string[]", "reference": "Portions.portion_id", "optional": true}
                ],
                "enable_nested_fields": true
            })"_json;
@ -2826,6 +3030,9 @@ TEST_F(CollectionJoinTest, FilterByObjectReferenceField) {
                        "portion_id": "portion_b",
                        "count": 3
                    },
+                    {
+                        "count": 3
+                    },
                    {
                        "portion_id": "portion_c",
                        "count": 1
@ -2844,20 +3051,15 @@ TEST_F(CollectionJoinTest, FilterByObjectReferenceField) {
        ASSERT_TRUE(add_op.ok());
    }

-    std::map<std::string, std::string> req_params = {
+    req_params = {
            {"collection", "Foods"},
            {"q", "*"},
            {"include_fields", "$Portions(*:merge)"}
    };
-    nlohmann::json embedded_params;
-    std::string json_res;
-    auto now_ts = std::chrono::duration_cast<std::chrono::microseconds>(
-            std::chrono::system_clock::now().time_since_epoch()).count();
-
-    auto search_op_bool = collectionManager.do_search(req_params, embedded_params, json_res, now_ts);
+    search_op_bool = collectionManager.do_search(req_params, embedded_params, json_res, now_ts);
    ASSERT_TRUE(search_op_bool.ok());

-    auto res_obj = nlohmann::json::parse(json_res);
+    res_obj = nlohmann::json::parse(json_res);
    ASSERT_EQ(2, res_obj["found"].get<size_t>());
    ASSERT_EQ(2, res_obj["hits"].size());
    ASSERT_EQ(3, res_obj["hits"][0]["document"].size());
@ -2865,21 +3067,28 @@ TEST_F(CollectionJoinTest, FilterByObjectReferenceField) {

    ASSERT_EQ("Milk", res_obj["hits"][0]["document"]["name"]);
    ASSERT_EQ(1, res_obj["hits"][0]["document"].count("portions"));
-    ASSERT_EQ(2, res_obj["hits"][0]["document"]["portions"].size());
+    ASSERT_EQ(3, res_obj["hits"][0]["document"]["portions"].size());
+
    ASSERT_EQ(5, res_obj["hits"][0]["document"]["portions"][0].size());
    ASSERT_EQ("portion_b", res_obj["hits"][0]["document"]["portions"][0].at("portion_id"));
    ASSERT_EQ(1 , res_obj["hits"][0]["document"]["portions"][0].at("quantity"));
    ASSERT_EQ("lt", res_obj["hits"][0]["document"]["portions"][0].at("unit"));
    ASSERT_EQ(3 , res_obj["hits"][0]["document"]["portions"][0].at("count"));
-    ASSERT_EQ(5, res_obj["hits"][0]["document"]["portions"][1].size());
-    ASSERT_EQ("portion_c", res_obj["hits"][0]["document"]["portions"][1].at("portion_id"));
-    ASSERT_EQ(500 , res_obj["hits"][0]["document"]["portions"][1].at("quantity"));
-    ASSERT_EQ("ml", res_obj["hits"][0]["document"]["portions"][1].at("unit"));
-    ASSERT_EQ(1 , res_obj["hits"][0]["document"]["portions"][1].at("count"));
+
+    ASSERT_EQ(1, res_obj["hits"][0]["document"]["portions"][1].size());
+    ASSERT_EQ(3 , res_obj["hits"][0]["document"]["portions"][1].at("count"));
+
+    ASSERT_EQ(5, res_obj["hits"][0]["document"]["portions"][2].size());
+    ASSERT_EQ("portion_c", res_obj["hits"][0]["document"]["portions"][2].at("portion_id"));
+    ASSERT_EQ(500 , res_obj["hits"][0]["document"]["portions"][2].at("quantity"));
+    ASSERT_EQ("ml", res_obj["hits"][0]["document"]["portions"][2].at("unit"));
+    ASSERT_EQ(1 , res_obj["hits"][0]["document"]["portions"][2].at("count"));
+

    ASSERT_EQ("Bread", res_obj["hits"][1]["document"]["name"]);
    ASSERT_EQ(1, res_obj["hits"][1]["document"].count("portions"));
    ASSERT_EQ(1, res_obj["hits"][1]["document"]["portions"].size());
+
    ASSERT_EQ(5, res_obj["hits"][1]["document"]["portions"][0].size());
    ASSERT_EQ("portion_a", res_obj["hits"][1]["document"]["portions"][0].at("portion_id"));
    ASSERT_EQ(500 , res_obj["hits"][1]["document"]["portions"][0].at("quantity"));