From 81af3643f7a09c687196f47174efa2fa383e3ba2 Mon Sep 17 00:00:00 2001
From: Harpreet Sangar <happy_san@protonmail.com>
Date: Wed, 13 Sep 2023 13:09:59 +0530
Subject: [PATCH 1/2] Fix `"include_fields": "*, "` only including indexed
 fields in the response.

---
 include/collection.h              |  6 +++--
 src/collection.cpp                | 19 ++++++++-------
 src/collection_manager.cpp        | 39 ++++++++++++++++++-------------
 test/collection_join_test.cpp     | 23 ++++++++++--------
 test/collection_specific_test.cpp | 30 ++++++++++++++++++++++++
 5 files changed, 80 insertions(+), 37 deletions(-)
diff --git a/include/collection.h b/include/collection.h
index bcec8ba7..9d2202bc 100644
--- a/include/collection.h
+++ b/include/collection.h
@@ -395,7 +395,8 @@ public:
                                   const tsl::htrie_set<char>& exclude_names, const std::string& parent_name = "",
                                   size_t depth = 0,
                                   const std::map<std::string, reference_filter_result_t>& reference_filter_results = {},
-                                  Collection *const collection = nullptr, const uint32_t& seq_id = 0);
+                                  Collection *const collection = nullptr, const uint32_t& seq_id = 0,
+                                  const std::vector<std::string>& ref_include_fields_vec = {});
 
     const Index* _get_index() const;
 
@@ -494,7 +495,8 @@ public:
                                   const size_t remote_embedding_timeout_ms = 30000,
                                   const size_t remote_embedding_num_tries = 2,
                                   const std::string& stopwords_set="",
-                                  const std::vector<std::string>& facet_return_parent = {}) const;
+                                  const std::vector<std::string>& facet_return_parent = {},
+                                  const std::vector<std::string>& ref_include_fields_vec = {}) const;
 
     Option<bool> get_filter_ids(const std::string & filter_query, filter_result_t& filter_result) const;
 
diff --git a/src/collection.cpp b/src/collection.cpp
index 79172751..192fd036 100644
--- a/src/collection.cpp
+++ b/src/collection.cpp
@@ -1388,7 +1388,8 @@ Option<nlohmann::json> Collection::search(std::string raw_query,
                                   const size_t remote_embedding_timeout_ms,
                                   const size_t remote_embedding_num_tries,
                                   const std::string& stopwords_set,
-                                  const std::vector<std::string>& facet_return_parent) const {
+                                  const std::vector<std::string>& facet_return_parent,
+                                  const std::vector<std::string>& ref_include_fields_vec) const {
 
     std::shared_lock lock(mutex);
 
@@ -2214,7 +2215,8 @@ Option<nlohmann::json> Collection::search(std::string raw_query,
                                       "",
                                       0,
                                       field_order_kv->reference_filter_results,
-                                      const_cast<Collection *>(this), get_seq_id_from_key(seq_id_key));
+                                      const_cast<Collection *>(this), get_seq_id_from_key(seq_id_key),
+                                      ref_include_fields_vec);
             if (!prune_op.ok()) {
                 return Option<nlohmann::json>(prune_op.code(), prune_op.error());
             }
@@ -4364,7 +4366,8 @@ Option<bool> Collection::prune_doc(nlohmann::json& doc,
                                    const tsl::htrie_set<char>& exclude_names,
                                    const std::string& parent_name, size_t depth,
                                    const std::map<std::string, reference_filter_result_t>& reference_filter_results,
-                                   Collection *const collection, const uint32_t& seq_id) {
+                                   Collection *const collection, const uint32_t& seq_id,
+                                   const std::vector<std::string>& ref_includes) {
     // doc can only be an object
     auto it = doc.begin();
     while(it != doc.end()) {
@@ -4440,9 +4443,7 @@ Option<bool> Collection::prune_doc(nlohmann::json& doc,
         it++;
     }
 
-    auto include_reference_it_pair = include_names.equal_prefix_range("$");
-    for (auto reference = include_reference_it_pair.first; reference != include_reference_it_pair.second; reference++) {
-        auto ref = reference.key();
+    for (auto const& ref: ref_includes) {
         size_t parenthesis_index = ref.find('(');
 
         auto ref_collection_name = ref.substr(1, parenthesis_index - 1);
@@ -4484,9 +4485,9 @@ Option<bool> Collection::prune_doc(nlohmann::json& doc,
         StringUtils::split(reference_fields, ref_include_fields_vec, ",");
         auto exclude_reference_it = exclude_names.equal_prefix_range("$" + ref_collection_name);
         if (exclude_reference_it.first != exclude_reference_it.second) {
-            ref = exclude_reference_it.first.key();
-            parenthesis_index = ref.find('(');
-            reference_fields = ref.substr(parenthesis_index + 1, ref.size() - parenthesis_index - 2);
+            auto ref_exclude = exclude_reference_it.first.key();
+            parenthesis_index = ref_exclude.find('(');
+            reference_fields = ref_exclude.substr(parenthesis_index + 1, ref_exclude.size() - parenthesis_index - 2);
             StringUtils::split(reference_fields, ref_exclude_fields_vec, ",");
         }
 
diff --git a/src/collection_manager.cpp b/src/collection_manager.cpp
index 47f613e8..0c1793ea 100644
--- a/src/collection_manager.cpp
+++ b/src/collection_manager.cpp
@@ -836,21 +836,22 @@ void CollectionManager::_get_reference_collection_names(const std::string& filte
     }
 }
 
-void initialize_include_fields_vec(const std::string& filter_query, std::vector<std::string>& include_fields_vec) {
-    if (filter_query.empty()) {
-        return;
-    }
-
+// Separate out the reference includes into `ref_include_fields_vec`.
+void initialize_ref_include_fields_vec(const std::string& filter_query, std::vector<std::string>& include_fields_vec,
+                                       std::vector<std::string>& ref_include_fields_vec) {
     std::set<std::string> reference_collection_names;
     CollectionManager::_get_reference_collection_names(filter_query, reference_collection_names);
-    if (reference_collection_names.empty()) {
-        return;
-    }
 
-    bool non_reference_include_found = false;
+    std::vector<std::string> result_include_fields_vec;
+    auto wildcard_include_all = true;
     for (auto include_field: include_fields_vec) {
         if (include_field[0] != '$') {
-            non_reference_include_found = true;
+            if (include_field == "*") {
+                continue;
+            }
+
+            wildcard_include_all = false;
+            result_include_fields_vec.emplace_back(include_field);
             continue;
         }
 
@@ -865,19 +866,23 @@ void initialize_include_fields_vec(const std::string& filter_query, std::vector<
             continue;
         }
 
-        // Referenced collection in filter_query is already mentioned in include_fields.
+        ref_include_fields_vec.emplace_back(include_field);
+
+        // Referenced collection in filter_query is already mentioned in ref_include_fields.
         reference_collection_names.erase(reference_collection_name);
     }
 
     // Get all the fields of the referenced collection in the filter but not mentioned in include_fields.
     for (const auto &reference_collection_name: reference_collection_names) {
-        include_fields_vec.emplace_back("$" + reference_collection_name + "(*)");
+        ref_include_fields_vec.emplace_back("$" + reference_collection_name + "(*)");
     }
 
     // Since no field of the collection is mentioned in include_fields, get all the fields.
-    if (!include_fields_vec.empty() && !non_reference_include_found) {
-        include_fields_vec.emplace_back("*");
+    if (wildcard_include_all) {
+        result_include_fields_vec.clear();
     }
+
+    include_fields_vec = std::move(result_include_fields_vec);
 }
 
 Option<bool> CollectionManager::do_search(std::map<std::string, std::string>& req_params,
@@ -1043,6 +1048,7 @@ Option<bool> CollectionManager::do_search(std::map<std::string, std::string>& re
 
     std::vector<std::string> include_fields_vec;
     std::vector<std::string> exclude_fields_vec;
+    std::vector<std::string> ref_include_fields_vec;
     spp::sparse_hash_set<std::string> include_fields;
     spp::sparse_hash_set<std::string> exclude_fields;
 
@@ -1235,7 +1241,7 @@ Option<bool> CollectionManager::do_search(std::map<std::string, std::string>& re
         per_page = 0;
     }
 
-    initialize_include_fields_vec(filter_query, include_fields_vec);
+    initialize_ref_include_fields_vec(filter_query, include_fields_vec, ref_include_fields_vec);
 
     include_fields.insert(include_fields_vec.begin(), include_fields_vec.end());
     exclude_fields.insert(exclude_fields_vec.begin(), exclude_fields_vec.end());
@@ -1326,7 +1332,8 @@ Option<bool> CollectionManager::do_search(std::map<std::string, std::string>& re
                                                           remote_embedding_timeout_ms,
                                                           remote_embedding_num_tries,
                                                           stopwords_set,
-                                                          facet_return_parent);
+                                                          facet_return_parent,
+                                                          ref_include_fields_vec);
 
     uint64_t timeMillis = std::chrono::duration_cast<std::chrono::milliseconds>(
             std::chrono::high_resolution_clock::now() - begin).count();
diff --git a/test/collection_join_test.cpp b/test/collection_join_test.cpp
index c4584bc9..e1a6e2e4 100644
--- a/test/collection_join_test.cpp
+++ b/test/collection_join_test.cpp
@@ -1167,7 +1167,8 @@ TEST_F(CollectionJoinTest, IncludeExcludeFieldsByReference) {
     ASSERT_EQ(1, res_obj["found"].get<size_t>());
     ASSERT_EQ(1, res_obj["hits"].size());
     // No fields are mentioned in `include_fields`, should include all fields of Products and Customers by default.
-    ASSERT_EQ(8, res_obj["hits"][0]["document"].size());
+    ASSERT_EQ(9, res_obj["hits"][0]["document"].size());
+    ASSERT_EQ(1, res_obj["hits"][0]["document"].count("id"));
     ASSERT_EQ(1, res_obj["hits"][0]["document"].count("product_id"));
     ASSERT_EQ(1, res_obj["hits"][0]["document"].count("product_name"));
     ASSERT_EQ(1, res_obj["hits"][0]["document"].count("product_description"));
@@ -1191,7 +1192,8 @@ TEST_F(CollectionJoinTest, IncludeExcludeFieldsByReference) {
     ASSERT_EQ(1, res_obj["found"].get<size_t>());
     ASSERT_EQ(1, res_obj["hits"].size());
     // No fields of Products collection are mentioned in `include_fields`, should include all of its fields by default.
-    ASSERT_EQ(4, res_obj["hits"][0]["document"].size());
+    ASSERT_EQ(5, res_obj["hits"][0]["document"].size());
+    ASSERT_EQ(1, res_obj["hits"][0]["document"].count("id"));
     ASSERT_EQ(1, res_obj["hits"][0]["document"].count("product_id"));
     ASSERT_EQ(1, res_obj["hits"][0]["document"].count("product_name"));
     ASSERT_EQ(1, res_obj["hits"][0]["document"].count("product_description"));
@@ -1210,7 +1212,7 @@ TEST_F(CollectionJoinTest, IncludeExcludeFieldsByReference) {
     res_obj = nlohmann::json::parse(json_res);
     ASSERT_EQ(1, res_obj["found"].get<size_t>());
     ASSERT_EQ(1, res_obj["hits"].size());
-    ASSERT_EQ(5, res_obj["hits"][0]["document"].size());
+    ASSERT_EQ(6, res_obj["hits"][0]["document"].size());
     ASSERT_EQ(1, res_obj["hits"][0]["document"].count("product_price"));
     ASSERT_EQ(73.5, res_obj["hits"][0]["document"].at("product_price"));
 
@@ -1227,7 +1229,7 @@ TEST_F(CollectionJoinTest, IncludeExcludeFieldsByReference) {
     res_obj = nlohmann::json::parse(json_res);
     ASSERT_EQ(1, res_obj["found"].get<size_t>());
     ASSERT_EQ(1, res_obj["hits"].size());
-    ASSERT_EQ(6, res_obj["hits"][0]["document"].size());
+    ASSERT_EQ(7, res_obj["hits"][0]["document"].size());
     ASSERT_EQ(1, res_obj["hits"][0]["document"].count("product_price"));
     ASSERT_EQ(73.5, res_obj["hits"][0]["document"].at("product_price"));
     ASSERT_EQ(1, res_obj["hits"][0]["document"].count("customer_id"));
@@ -1246,8 +1248,8 @@ TEST_F(CollectionJoinTest, IncludeExcludeFieldsByReference) {
     res_obj = nlohmann::json::parse(json_res);
     ASSERT_EQ(1, res_obj["found"].get<size_t>());
     ASSERT_EQ(1, res_obj["hits"].size());
-    // 4 fields in Products document and 2 fields from Customers document
-    ASSERT_EQ(6, res_obj["hits"][0]["document"].size());
+    // 5 fields in Products document and 2 fields from Customers document
+    ASSERT_EQ(7, res_obj["hits"][0]["document"].size());
 
     req_params = {
             {"collection", "Products"},
@@ -1262,8 +1264,9 @@ TEST_F(CollectionJoinTest, IncludeExcludeFieldsByReference) {
     res_obj = nlohmann::json::parse(json_res);
     ASSERT_EQ(1, res_obj["found"].get<size_t>());
     ASSERT_EQ(1, res_obj["hits"].size());
-    // 4 fields in Products document and 2 fields from Customers document
-    ASSERT_EQ(6, res_obj["hits"][0]["document"].size());
+    // 5 fields in Products document and 2 fields from Customers document
+    ASSERT_EQ(7, res_obj["hits"][0]["document"].size());
+    ASSERT_EQ(1, res_obj["hits"][0]["document"].count("product_price"));
     ASSERT_EQ(1, res_obj["hits"][0]["document"].count("product_id_sequence_id"));
 
     req_params = {
@@ -1280,8 +1283,8 @@ TEST_F(CollectionJoinTest, IncludeExcludeFieldsByReference) {
     res_obj = nlohmann::json::parse(json_res);
     ASSERT_EQ(1, res_obj["found"].get<size_t>());
     ASSERT_EQ(1, res_obj["hits"].size());
-    // 4 fields in Products document and 1 fields from Customers document
-    ASSERT_EQ(5, res_obj["hits"][0]["document"].size());
+    // 5 fields in Products document and 1 fields from Customers document
+    ASSERT_EQ(6, res_obj["hits"][0]["document"].size());
     ASSERT_EQ(1, res_obj["hits"][0]["document"].count("product_id"));
     ASSERT_EQ(1, res_obj["hits"][0]["document"].count("product_name"));
     ASSERT_EQ(1, res_obj["hits"][0]["document"].count("product_description"));
diff --git a/test/collection_specific_test.cpp b/test/collection_specific_test.cpp
index 6c95c2f9..38764660 100644
--- a/test/collection_specific_test.cpp
+++ b/test/collection_specific_test.cpp
@@ -2879,6 +2879,36 @@ TEST_F(CollectionSpecificTest, NonIndexField) {
     ASSERT_EQ(1, results["hits"].size());
     ASSERT_EQ(1, coll1->_get_index()->_get_search_index().size());
 
+    std::map<std::string, std::string> req_params = {
+            {"collection", "coll1"},
+            {"q", "*"},
+            {"include_fields", "*, "}
+    };
+    nlohmann::json embedded_params;
+    std::string json_res;
+    auto now_ts = std::chrono::duration_cast<std::chrono::microseconds>(
+            std::chrono::system_clock::now().time_since_epoch()).count();
+
+    collectionManager.do_search(req_params, embedded_params, json_res, now_ts);
+
+    results = nlohmann::json::parse(json_res);
+    ASSERT_EQ(1, results["hits"].size());
+    ASSERT_EQ(3, results["hits"][0].at("document").size());
+    ASSERT_EQ(1, results["hits"][0].at("document").count("description"));
+
+    req_params = {
+            {"collection", "coll1"},
+            {"q", "*"},
+            {"include_fields", "*, title"}  // Adding a field name overrides include all wildcard
+    };
+
+    collectionManager.do_search(req_params, embedded_params, json_res, now_ts);
+
+    results = nlohmann::json::parse(json_res);
+    ASSERT_EQ(1, results["hits"].size());
+    ASSERT_EQ(1, results["hits"][0].at("document").size());
+    ASSERT_EQ(1, results["hits"][0].at("document").count("title"));
+
     collectionManager.drop_collection("coll1");
 }
 

From ee05ecefa1bb1206fe3a6384e20c1b5bd1cab8b6 Mon Sep 17 00:00:00 2001
From: Harpreet Sangar <happy_san@protonmail.com>
Date: Thu, 14 Sep 2023 12:23:08 +0530
Subject: [PATCH 2/2] Support `as` to specify an alias for reference doc's
 fields. Fix reference `include_fields` returning flattened fields.

---
 include/collection.h          |  5 +++--
 include/field.h               |  5 +++++
 src/collection.cpp            | 34 +++++++++++++++++++++++-----
 src/collection_manager.cpp    | 30 ++++++++++++++++---------
 src/string_utils.cpp          |  8 +++++++
 test/collection_join_test.cpp | 42 ++++++++++++++++++++++++++++++-----
 test/string_utils_test.cpp    |  8 +++++++
 7 files changed, 107 insertions(+), 25 deletions(-)

diff --git a/include/collection.h b/include/collection.h
index 9d2202bc..4f8cd435 100644
--- a/include/collection.h
+++ b/include/collection.h
@@ -386,6 +386,7 @@ public:
 
     static Option<bool> add_reference_fields(nlohmann::json& doc,
                                              Collection *const ref_collection,
+                                             const std::string& alias,
                                              const reference_filter_result_t& references,
                                              const tsl::htrie_set<char>& ref_include_fields_full,
                                              const tsl::htrie_set<char>& ref_exclude_fields_full,
@@ -396,7 +397,7 @@ public:
                                   size_t depth = 0,
                                   const std::map<std::string, reference_filter_result_t>& reference_filter_results = {},
                                   Collection *const collection = nullptr, const uint32_t& seq_id = 0,
-                                  const std::vector<std::string>& ref_include_fields_vec = {});
+                                  const std::vector<ref_include_fields>& ref_include_fields_vec = {});
 
     const Index* _get_index() const;
 
@@ -496,7 +497,7 @@ public:
                                   const size_t remote_embedding_num_tries = 2,
                                   const std::string& stopwords_set="",
                                   const std::vector<std::string>& facet_return_parent = {},
-                                  const std::vector<std::string>& ref_include_fields_vec = {}) const;
+                                  const std::vector<ref_include_fields>& ref_include_fields_vec = {}) const;
 
     Option<bool> get_filter_ids(const std::string & filter_query, filter_result_t& filter_result) const;
 
diff --git a/include/field.h b/include/field.h
index aa54842f..e8fcd1e7 100644
--- a/include/field.h
+++ b/include/field.h
@@ -487,6 +487,11 @@ namespace sort_field_const {
     static const std::string vector_distance = "_vector_distance";
 }
 
+struct ref_include_fields {
+    std::string expression;
+    std::string alias;
+};
+
 struct sort_by {
     enum missing_values_t {
         first,
diff --git a/src/collection.cpp b/src/collection.cpp
index 192fd036..7496d680 100644
--- a/src/collection.cpp
+++ b/src/collection.cpp
@@ -1389,7 +1389,7 @@ Option<nlohmann::json> Collection::search(std::string raw_query,
                                   const size_t remote_embedding_num_tries,
                                   const std::string& stopwords_set,
                                   const std::vector<std::string>& facet_return_parent,
-                                  const std::vector<std::string>& ref_include_fields_vec) const {
+                                  const std::vector<ref_include_fields>& ref_include_fields_vec) const {
 
     std::shared_lock lock(mutex);
 
@@ -4314,6 +4314,7 @@ void Collection::remove_flat_fields(nlohmann::json& document) {
 
 Option<bool> Collection::add_reference_fields(nlohmann::json& doc,
                                               Collection *const ref_collection,
+                                              const std::string& alias,
                                               const reference_filter_result_t& references,
                                               const tsl::htrie_set<char>& ref_include_fields_full,
                                               const tsl::htrie_set<char>& ref_exclude_fields_full,
@@ -4328,11 +4329,21 @@ Option<bool> Collection::add_reference_fields(nlohmann::json& doc,
             return Option<bool>(get_doc_op.code(), error_prefix + get_doc_op.error());
         }
 
+        remove_flat_fields(ref_doc);
+
         auto prune_op = prune_doc(ref_doc, ref_include_fields_full, ref_exclude_fields_full);
         if (!prune_op.ok()) {
             return Option<bool>(prune_op.code(), error_prefix + prune_op.error());
         }
 
+        if (!alias.empty()) {
+            auto temp_doc = ref_doc;
+            ref_doc.clear();
+            for (const auto &item: temp_doc.items()) {
+                ref_doc[alias + item.key()] = item.value();
+            }
+        }
+
         doc.update(ref_doc);
         return Option<bool>(true);
     }
@@ -4347,11 +4358,21 @@ Option<bool> Collection::add_reference_fields(nlohmann::json& doc,
             return Option<bool>(get_doc_op.code(), error_prefix + get_doc_op.error());
         }
 
+        remove_flat_fields(ref_doc);
+
         auto prune_op = prune_doc(ref_doc, ref_include_fields_full, ref_exclude_fields_full);
         if (!prune_op.ok()) {
             return Option<bool>(prune_op.code(), error_prefix + prune_op.error());
         }
 
+        if (!alias.empty()) {
+            auto temp_doc = ref_doc;
+            ref_doc.clear();
+            for (const auto &item: temp_doc.items()) {
+                ref_doc[alias + item.key()] = item.value();
+            }
+        }
+
         for (auto ref_doc_it = ref_doc.begin(); ref_doc_it != ref_doc.end(); ref_doc_it++) {
             // Add the values of ref_doc as JSON array into doc.
             doc[ref_doc_it.key()] += ref_doc_it.value();
@@ -4367,7 +4388,7 @@ Option<bool> Collection::prune_doc(nlohmann::json& doc,
                                    const std::string& parent_name, size_t depth,
                                    const std::map<std::string, reference_filter_result_t>& reference_filter_results,
                                    Collection *const collection, const uint32_t& seq_id,
-                                   const std::vector<std::string>& ref_includes) {
+                                   const std::vector<ref_include_fields>& ref_includes) {
     // doc can only be an object
     auto it = doc.begin();
     while(it != doc.end()) {
@@ -4443,7 +4464,8 @@ Option<bool> Collection::prune_doc(nlohmann::json& doc,
         it++;
     }
 
-    for (auto const& ref: ref_includes) {
+    for (auto const& ref_include: ref_includes) {
+        auto const& ref = ref_include.expression;
         size_t parenthesis_index = ref.find('(');
 
         auto ref_collection_name = ref.substr(1, parenthesis_index - 1);
@@ -4507,7 +4529,7 @@ Option<bool> Collection::prune_doc(nlohmann::json& doc,
 
         Option<bool> add_reference_fields_op = Option<bool>(true);
         if (has_filter_reference) {
-            add_reference_fields_op = add_reference_fields(doc, ref_collection.get(),
+            add_reference_fields_op = add_reference_fields(doc, ref_collection.get(), ref_include.alias,
                                                            reference_filter_results.at(ref_collection_name),
                                                            ref_include_fields_full, ref_exclude_fields_full,
                                                            error_prefix);
@@ -4523,7 +4545,7 @@ Option<bool> Collection::prune_doc(nlohmann::json& doc,
             }
 
             reference_filter_result_t r{1, new uint32[1]{get_reference_doc_id_op.get()}};
-            add_reference_fields_op = add_reference_fields(doc, ref_collection.get(), r,
+            add_reference_fields_op = add_reference_fields(doc, ref_collection.get(), ref_include.alias, r,
                                                            ref_include_fields_full, ref_exclude_fields_full,
                                                            error_prefix);
         } else if (joined_coll_has_reference) {
@@ -4552,7 +4574,7 @@ Option<bool> Collection::prune_doc(nlohmann::json& doc,
                 r.docs[i] = op.get();
             }
 
-            add_reference_fields_op = add_reference_fields(doc, ref_collection.get(), r,
+            add_reference_fields_op = add_reference_fields(doc, ref_collection.get(), ref_include.alias, r,
                                                            ref_include_fields_full, ref_exclude_fields_full,
                                                            error_prefix);
         }
diff --git a/src/collection_manager.cpp b/src/collection_manager.cpp
index 0c1793ea..f7727688 100644
--- a/src/collection_manager.cpp
+++ b/src/collection_manager.cpp
@@ -9,6 +9,7 @@
 #include "logger.h"
 #include "magic_enum.hpp"
 #include "stopwords_manager.h"
+#include "field.h"
 
 constexpr const size_t CollectionManager::DEFAULT_NUM_MEMORY_SHARDS;
 
@@ -838,43 +839,50 @@ void CollectionManager::_get_reference_collection_names(const std::string& filte
 
 // Separate out the reference includes into `ref_include_fields_vec`.
 void initialize_ref_include_fields_vec(const std::string& filter_query, std::vector<std::string>& include_fields_vec,
-                                       std::vector<std::string>& ref_include_fields_vec) {
+                                       std::vector<ref_include_fields>& ref_include_fields_vec) {
     std::set<std::string> reference_collection_names;
     CollectionManager::_get_reference_collection_names(filter_query, reference_collection_names);
 
     std::vector<std::string> result_include_fields_vec;
     auto wildcard_include_all = true;
-    for (auto include_field: include_fields_vec) {
-        if (include_field[0] != '$') {
-            if (include_field == "*") {
+    for (auto include_field_exp: include_fields_vec) {
+        if (include_field_exp[0] != '$') {
+            if (include_field_exp == "*") {
                 continue;
             }
 
             wildcard_include_all = false;
-            result_include_fields_vec.emplace_back(include_field);
+            result_include_fields_vec.emplace_back(include_field_exp);
             continue;
         }
 
-        auto open_paren_pos = include_field.find('(');
+        auto as_pos = include_field_exp.find(" as ");
+        auto ref_include = include_field_exp.substr(0, as_pos),
+                alias = (as_pos == std::string::npos) ? "" :
+                        include_field_exp.substr(as_pos + 4, include_field_exp.size() - (as_pos + 4));
+
+        // For an alias `foo`, we need append `foo.` to all the top level keys of reference doc.
+        ref_include_fields_vec.emplace_back(ref_include_fields{ref_include, alias.empty() ? alias :
+                                                                                StringUtils::trim(alias) + "."});
+
+        auto open_paren_pos = include_field_exp.find('(');
         if (open_paren_pos == std::string::npos) {
             continue;
         }
 
-        auto reference_collection_name = include_field.substr(1, open_paren_pos - 1);
+        auto reference_collection_name = include_field_exp.substr(1, open_paren_pos - 1);
         StringUtils::trim(reference_collection_name);
         if (reference_collection_name.empty()) {
             continue;
         }
 
-        ref_include_fields_vec.emplace_back(include_field);
-
         // Referenced collection in filter_query is already mentioned in ref_include_fields.
         reference_collection_names.erase(reference_collection_name);
     }
 
     // Get all the fields of the referenced collection in the filter but not mentioned in include_fields.
     for (const auto &reference_collection_name: reference_collection_names) {
-        ref_include_fields_vec.emplace_back("$" + reference_collection_name + "(*)");
+        ref_include_fields_vec.emplace_back(ref_include_fields{"$" + reference_collection_name + "(*)", ""});
     }
 
     // Since no field of the collection is mentioned in include_fields, get all the fields.
@@ -1048,7 +1056,7 @@ Option<bool> CollectionManager::do_search(std::map<std::string, std::string>& re
 
     std::vector<std::string> include_fields_vec;
     std::vector<std::string> exclude_fields_vec;
-    std::vector<std::string> ref_include_fields_vec;
+    std::vector<ref_include_fields> ref_include_fields_vec;
     spp::sparse_hash_set<std::string> include_fields;
     spp::sparse_hash_set<std::string> exclude_fields;
 
diff --git a/src/string_utils.cpp b/src/string_utils.cpp
index 6b320fc2..a9296cd5 100644
--- a/src/string_utils.cpp
+++ b/src/string_utils.cpp
@@ -516,6 +516,14 @@ Option<bool> StringUtils::split_include_fields(const std::string& include_fields
             }
 
             include_field = include_fields.substr(range_pos, (end - range_pos) + 1);
+
+            comma_pos = include_fields.find(',', end);
+            auto as_pos = include_fields.find(" as ", end);
+            if (as_pos != std::string::npos && as_pos < comma_pos) {
+                auto alias = include_fields.substr(as_pos, (comma_pos - as_pos));
+                end += alias.size() + 1;
+                include_field += (" " + trim(alias));
+            }
         } else {
             end = comma_pos;
             include_field = include_fields.substr(start, end - start);
diff --git a/test/collection_join_test.cpp b/test/collection_join_test.cpp
index e1a6e2e4..bc8dea8c 100644
--- a/test/collection_join_test.cpp
+++ b/test/collection_join_test.cpp
@@ -1511,6 +1511,26 @@ TEST_F(CollectionJoinTest, IncludeExcludeFieldsByReference) {
     ASSERT_EQ(1, res_obj["hits"][0]["document"].count("product_price"));
     ASSERT_EQ(73.5, res_obj["hits"][0]["document"].at("product_price"));
 
+    // Add alias using `as`
+    req_params = {
+            {"collection", "Customers"},
+            {"q", "Joe"},
+            {"query_by", "customer_name"},
+            {"filter_by", "product_price:<100"},
+            {"include_fields", "$Products(product_name) as p, product_price"}
+    };
+    search_op = collectionManager.do_search(req_params, embedded_params, json_res, now_ts);
+    ASSERT_TRUE(search_op.ok());
+
+    res_obj = nlohmann::json::parse(json_res);
+    ASSERT_EQ(1, res_obj["found"].get<size_t>());
+    ASSERT_EQ(1, res_obj["hits"].size());
+    ASSERT_EQ(2, res_obj["hits"][0]["document"].size());
+    ASSERT_EQ(1, res_obj["hits"][0]["document"].count("p.product_name"));
+    ASSERT_EQ("soap", res_obj["hits"][0]["document"].at("p.product_name"));
+    ASSERT_EQ(1, res_obj["hits"][0]["document"].count("product_price"));
+    ASSERT_EQ(73.5, res_obj["hits"][0]["document"].at("product_price"));
+
     schema_json =
             R"({
                 "name": "Users",
@@ -1649,13 +1669,19 @@ TEST_F(CollectionJoinTest, IncludeExcludeFieldsByReference) {
                 "name": "Organizations",
                 "fields": [
                     {"name": "org_id", "type": "string"},
-                    {"name": "org_name", "type": "string"}
-                ]
+                    {"name": "name", "type": "object"},
+                    {"name": "name.first", "type": "string"},
+                    {"name": "name.last", "type": "string"}
+                ],
+                "enable_nested_fields": true
             })"_json;
     documents = {
             R"({
                 "org_id": "org_a",
-                "org_name": "Typesense"
+                "name": {
+                    "first": "type",
+                    "last": "sense"
+                }
             })"_json
     };
     collection_create_op = collectionManager.create_collection(schema_json);
@@ -1708,7 +1734,7 @@ TEST_F(CollectionJoinTest, IncludeExcludeFieldsByReference) {
             {"q", "R"},
             {"query_by", "user_name"},
             {"filter_by", "$Participants(org_id:=org_a) && $Links(repo_id:=repo_b)"},
-            {"include_fields", "user_id, user_name, $Repos(repo_content), $Organizations(org_name)"},
+            {"include_fields", "user_id, user_name, $Repos(repo_content), $Organizations(name) as org"},
             {"exclude_fields", "$Participants(*), $Links(*), "}
     };
     search_op = collectionManager.do_search(req_params, embedded_params, json_res, now_ts);
@@ -1718,14 +1744,18 @@ TEST_F(CollectionJoinTest, IncludeExcludeFieldsByReference) {
     ASSERT_EQ(2, res_obj["found"].get<size_t>());
     ASSERT_EQ(2, res_obj["hits"].size());
     ASSERT_EQ(4, res_obj["hits"][0]["document"].size());
+
     ASSERT_EQ("user_b", res_obj["hits"][0]["document"].at("user_id"));
     ASSERT_EQ("Ruby", res_obj["hits"][0]["document"].at("user_name"));
     ASSERT_EQ("body2", res_obj["hits"][0]["document"].at("repo_content"));
-    ASSERT_EQ("Typesense", res_obj["hits"][0]["document"].at("org_name"));
+    ASSERT_EQ("type", res_obj["hits"][0]["document"]["org.name"].at("first"));
+    ASSERT_EQ("sense", res_obj["hits"][0]["document"]["org.name"].at("last"));
+
     ASSERT_EQ("user_a", res_obj["hits"][1]["document"].at("user_id"));
     ASSERT_EQ("Roshan", res_obj["hits"][1]["document"].at("user_name"));
     ASSERT_EQ("body2", res_obj["hits"][1]["document"].at("repo_content"));
-    ASSERT_EQ("Typesense", res_obj["hits"][1]["document"].at("org_name"));
+    ASSERT_EQ("type", res_obj["hits"][0]["document"]["org.name"].at("first"));
+    ASSERT_EQ("sense", res_obj["hits"][0]["document"]["org.name"].at("last"));
 }
 
 TEST_F(CollectionJoinTest, CascadeDeletion) {
diff --git a/test/string_utils_test.cpp b/test/string_utils_test.cpp
index 1ee9e6ac..bd33a169 100644
--- a/test/string_utils_test.cpp
+++ b/test/string_utils_test.cpp
@@ -419,4 +419,12 @@ TEST(StringUtilsTest, SplitIncludeFields) {
     include_fields = "id, $Collection(title, pref*), count, ";
     tokens = {"id", "$Collection(title, pref*)", "count"};
     splitIncludeTestHelper(include_fields, tokens);
+
+    include_fields = "$Collection(title, pref*) as coll";
+    tokens = {"$Collection(title, pref*) as coll"};
+    splitIncludeTestHelper(include_fields, tokens);
+
+    include_fields = "id, $Collection(title, pref*)  as coll , count, ";
+    tokens = {"id", "$Collection(title, pref*) as coll", "count"};
+    splitIncludeTestHelper(include_fields, tokens);
 }