From 95112a808618e6cbcb560f5e31dad984087b36d3 Mon Sep 17 00:00:00 2001
From: Kishore Nallan <kishore@kishorelive.com>
Date: Tue, 8 May 2018 07:53:13 +0530
Subject: [PATCH] Support inclusion and exclusion of document fields that are
 returned in search response.

---
 include/collection.h     |  7 ++++++-
 src/api.cpp              | 22 +++++++++++++++++++-
 src/collection.cpp       | 25 +++++++++++++++++-----
 test/collection_test.cpp | 45 ++++++++++++++++++++++++++++++++++++++++
 4 files changed, 92 insertions(+), 7 deletions(-)

diff --git a/include/collection.h b/include/collection.h
index 8f5ac786..f920b8b4 100644
--- a/include/collection.h
+++ b/include/collection.h
@@ -97,7 +97,9 @@ public:
                           const std::vector<sort_by> & sort_fields, const int num_typos,
                           const size_t per_page = 10, const size_t page = 1,
                           const token_ordering token_order = FREQUENCY, const bool prefix = false,
-                          const size_t drop_tokens_threshold = Index::DROP_TOKENS_THRESHOLD);
+                          const size_t drop_tokens_threshold = Index::DROP_TOKENS_THRESHOLD,
+                          const spp::sparse_hash_set<std::string> include_fields = spp::sparse_hash_set<std::string>(),
+                          const spp::sparse_hash_set<std::string> exclude_fields = spp::sparse_hash_set<std::string>());
 
     Option<nlohmann::json> get(const std::string & id);
 
@@ -105,6 +107,9 @@ public:
 
     Option<uint32_t> index_in_memory(const nlohmann::json & document, uint32_t seq_id);
 
+    static void prune_document(nlohmann::json &document, const spp::sparse_hash_set<std::string> include_fields,
+                               const spp::sparse_hash_set<std::string> exclude_fields);
+
     static const int MAX_SEARCH_TOKENS = 10;
     static const int MAX_RESULTS = 500;
 
diff --git a/src/api.cpp b/src/api.cpp
index 2e5d5093..8d9f1e44 100644
--- a/src/api.cpp
+++ b/src/api.cpp
@@ -171,6 +171,8 @@ void get_search(http_req & req, http_res & res) {
     const char *PAGE = "page";
     const char *CALLBACK = "callback";
     const char *RANK_TOKENS_BY = "rank_tokens_by";
+    const char *INCLUDE_FIELDS = "include_fields";
+    const char *EXCLUDE_FIELDS = "exclude_fields";
 
     if(req.params.count(NUM_TYPOS) == 0) {
         req.params[NUM_TYPOS] = "2";
@@ -200,6 +202,14 @@ void get_search(http_req & req, http_res & res) {
         req.params[PAGE] = "1";
     }
 
+    if(req.params.count(INCLUDE_FIELDS) == 0) {
+        req.params[INCLUDE_FIELDS] = "";
+    }
+
+    if(req.params.count(EXCLUDE_FIELDS) == 0) {
+        req.params[EXCLUDE_FIELDS] = "";
+    }
+
     if(!StringUtils::is_uint64_t(req.params[DROP_TOKENS_THRESHOLD])) {
         return res.send_400("Parameter `" + std::string(DROP_TOKENS_THRESHOLD) + "` must be an unsigned integer.");
     }
@@ -224,6 +234,15 @@ void get_search(http_req & req, http_res & res) {
     std::vector<std::string> facet_fields;
     StringUtils::split(req.params[FACET_BY], facet_fields, ",");
 
+    std::vector<std::string> include_fields_vec;
+    StringUtils::split(req.params[INCLUDE_FIELDS], include_fields_vec, ",");
+
+    std::vector<std::string> exclude_fields_vec;
+    StringUtils::split(req.params[EXCLUDE_FIELDS], exclude_fields_vec, ",");
+
+    spp::sparse_hash_set<std::string> include_fields(include_fields_vec.begin(), include_fields_vec.end());
+    spp::sparse_hash_set<std::string> exclude_fields(exclude_fields_vec.begin(), exclude_fields_vec.end());
+
     std::vector<sort_by> sort_fields;
     if(req.params.count(SORT_BY) != 0) {
         std::vector<std::string> sort_field_strs;
@@ -266,7 +285,8 @@ void get_search(http_req & req, http_res & res) {
     Option<nlohmann::json> result_op = collection->search(req.params[QUERY], search_fields, filter_str, facet_fields,
                                                sort_fields, std::stoi(req.params[NUM_TYPOS]),
                                                std::stoi(req.params[PER_PAGE]), std::stoi(req.params[PAGE]),
-                                               token_order, prefix, drop_tokens_threshold);
+                                               token_order, prefix, drop_tokens_threshold,
+                                               include_fields, exclude_fields);
 
     uint64_t timeMillis = std::chrono::duration_cast<std::chrono::milliseconds>(
                                std::chrono::high_resolution_clock::now() - begin).count();
diff --git a/src/collection.cpp b/src/collection.cpp
index fde6be63..391e8a83 100644
--- a/src/collection.cpp
+++ b/src/collection.cpp
@@ -261,12 +261,26 @@ Option<uint32_t> Collection::index_in_memory(const nlohmann::json &document, uin
     return Option<>(200);
 }
 
+void Collection::prune_document(nlohmann::json &document, const spp::sparse_hash_set<std::string> include_fields,
+                                const spp::sparse_hash_set<std::string> exclude_fields) {
+    auto it = document.begin();
+    for(; it != document.end(); ) {
+        if(exclude_fields.count(it.key()) != 0 || (include_fields.size() != 0 && include_fields.count(it.key()) == 0)) {
+            it = document.erase(it);
+        } else {
+            ++it;
+        }
+    }
+}
+
 Option<nlohmann::json> Collection::search(std::string query, const std::vector<std::string> search_fields,
                                   const std::string & simple_filter_query, const std::vector<std::string> & facet_fields,
                                   const std::vector<sort_by> & sort_fields, const int num_typos,
                                   const size_t per_page, const size_t page,
                                   const token_ordering token_order, const bool prefix,
-                                  const size_t drop_tokens_threshold) {
+                                  const size_t drop_tokens_threshold,
+                                  const spp::sparse_hash_set<std::string> include_fields,
+                                  const spp::sparse_hash_set<std::string> exclude_fields) {
     std::vector<facet> facets;
 
     // validate search fields
@@ -535,10 +549,6 @@ Option<nlohmann::json> Collection::search(std::string query, const std::vector<s
             return Option<nlohmann::json>(500, "Error while parsing stored document.");
         }
 
-        wrapper_doc["document"] = document;
-        //wrapper_doc["match_score"] = field_order_kv.match_score;
-        //wrapper_doc["seq_id"] = (uint32_t) field_order_kv.key;
-
         // highlight query words in the result
         const std::string & field_name = search_fields[Index::FIELD_LIMIT_NUM - field_order_kv.field_id];
         field search_field = search_schema.at(field_name);
@@ -630,6 +640,11 @@ Option<nlohmann::json> Collection::search(std::string query, const std::vector<s
                 delete [] it->second;
                 it->second = nullptr;
             }
+
+            prune_document(document, include_fields, exclude_fields);
+            wrapper_doc["document"] = document;
+            //wrapper_doc["match_score"] = field_order_kv.match_score;
+            //wrapper_doc["seq_id"] = (uint32_t) field_order_kv.key;
         }
 
         result["hits"].push_back(wrapper_doc);
diff --git a/test/collection_test.cpp b/test/collection_test.cpp
index e25b2b58..eb1ec247 100644
--- a/test/collection_test.cpp
+++ b/test/collection_test.cpp
@@ -1773,4 +1773,49 @@ TEST_F(CollectionTest, DeletionOfADocument) {
     ASSERT_EQ(3, num_keys);
 
     collectionManager.drop_collection("collection_for_del");
+}
+
+nlohmann::json get_prune_doc() {
+    nlohmann::json document;
+    document["one"] = 1;
+    document["two"] = 2;
+    document["three"] = 3;
+    document["four"] = 4;
+
+    return document;
+}
+
+TEST_F(CollectionTest, PruneFieldsFromDocument) {
+    nlohmann::json document = get_prune_doc();
+    Collection::prune_document(document, {"one", "two"}, spp::sparse_hash_set<std::string>());
+    ASSERT_EQ(2, document.size());
+    ASSERT_EQ(1, document["one"]);
+    ASSERT_EQ(2, document["two"]);
+
+    // exclude takes precedence
+    document = get_prune_doc();
+    Collection::prune_document(document, {"one"}, {"one"});
+    ASSERT_EQ(0, document.size());
+
+    // when no inclusion is specified, should return all fields not mentioned by exclusion list
+    document = get_prune_doc();
+    Collection::prune_document(document, spp::sparse_hash_set<std::string>(), {"three"});
+    ASSERT_EQ(3, document.size());
+    ASSERT_EQ(1, document["one"]);
+    ASSERT_EQ(2, document["two"]);
+    ASSERT_EQ(4, document["four"]);
+
+    document = get_prune_doc();
+    Collection::prune_document(document, spp::sparse_hash_set<std::string>(), spp::sparse_hash_set<std::string>());
+    ASSERT_EQ(4, document.size());
+
+    // when included field does not exist
+    document = get_prune_doc();
+    Collection::prune_document(document, {"notfound"}, spp::sparse_hash_set<std::string>());
+    ASSERT_EQ(0, document.size());
+
+    // when excluded field does not exist
+    document = get_prune_doc();
+    Collection::prune_document(document, spp::sparse_hash_set<std::string>(), {"notfound"});
+    ASSERT_EQ(4, document.size());
 }
\ No newline at end of file