Support inclusion and exclusion of document fields that are returned in search response.

2025-05-17 20:22:32 +08:00 · 2018-05-08 07:53:13 +05:30 · 2018-05-08 07:53:13 +05:30 · 95112a8086
commit 95112a8086
parent 3cdeff7814
4 changed files with 92 additions and 7 deletions
--- a/include/collection.h
+++ b/include/collection.h
@ -97,7 +97,9 @@ public:
                          const std::vector<sort_by> & sort_fields, const int num_typos,
                          const size_t per_page = 10, const size_t page = 1,
                          const token_ordering token_order = FREQUENCY, const bool prefix = false,
-                          const size_t drop_tokens_threshold = Index::DROP_TOKENS_THRESHOLD);
+                          const size_t drop_tokens_threshold = Index::DROP_TOKENS_THRESHOLD,
+                          const spp::sparse_hash_set<std::string> include_fields = spp::sparse_hash_set<std::string>(),
+                          const spp::sparse_hash_set<std::string> exclude_fields = spp::sparse_hash_set<std::string>());

    Option<nlohmann::json> get(const std::string & id);

@ -105,6 +107,9 @@ public:

    Option<uint32_t> index_in_memory(const nlohmann::json & document, uint32_t seq_id);

+    static void prune_document(nlohmann::json &document, const spp::sparse_hash_set<std::string> include_fields,
+                               const spp::sparse_hash_set<std::string> exclude_fields);
+
    static const int MAX_SEARCH_TOKENS = 10;
    static const int MAX_RESULTS = 500;

--- a/src/api.cpp
+++ b/src/api.cpp
@ -171,6 +171,8 @@ void get_search(http_req & req, http_res & res) {
    const char *PAGE = "page";
    const char *CALLBACK = "callback";
    const char *RANK_TOKENS_BY = "rank_tokens_by";
+    const char *INCLUDE_FIELDS = "include_fields";
+    const char *EXCLUDE_FIELDS = "exclude_fields";

    if(req.params.count(NUM_TYPOS) == 0) {
        req.params[NUM_TYPOS] = "2";
@ -200,6 +202,14 @@ void get_search(http_req & req, http_res & res) {
        req.params[PAGE] = "1";
    }

+    if(req.params.count(INCLUDE_FIELDS) == 0) {
+        req.params[INCLUDE_FIELDS] = "";
+    }
+
+    if(req.params.count(EXCLUDE_FIELDS) == 0) {
+        req.params[EXCLUDE_FIELDS] = "";
+    }
+
    if(!StringUtils::is_uint64_t(req.params[DROP_TOKENS_THRESHOLD])) {
        return res.send_400("Parameter `" + std::string(DROP_TOKENS_THRESHOLD) + "` must be an unsigned integer.");
    }
@ -224,6 +234,15 @@ void get_search(http_req & req, http_res & res) {
    std::vector<std::string> facet_fields;
    StringUtils::split(req.params[FACET_BY], facet_fields, ",");

+    std::vector<std::string> include_fields_vec;
+    StringUtils::split(req.params[INCLUDE_FIELDS], include_fields_vec, ",");
+
+    std::vector<std::string> exclude_fields_vec;
+    StringUtils::split(req.params[EXCLUDE_FIELDS], exclude_fields_vec, ",");
+
+    spp::sparse_hash_set<std::string> include_fields(include_fields_vec.begin(), include_fields_vec.end());
+    spp::sparse_hash_set<std::string> exclude_fields(exclude_fields_vec.begin(), exclude_fields_vec.end());
+
    std::vector<sort_by> sort_fields;
    if(req.params.count(SORT_BY) != 0) {
        std::vector<std::string> sort_field_strs;
@ -266,7 +285,8 @@ void get_search(http_req & req, http_res & res) {
    Option<nlohmann::json> result_op = collection->search(req.params[QUERY], search_fields, filter_str, facet_fields,
                                               sort_fields, std::stoi(req.params[NUM_TYPOS]),
                                               std::stoi(req.params[PER_PAGE]), std::stoi(req.params[PAGE]),
-                                               token_order, prefix, drop_tokens_threshold);
+                                               token_order, prefix, drop_tokens_threshold,
+                                               include_fields, exclude_fields);

    uint64_t timeMillis = std::chrono::duration_cast<std::chrono::milliseconds>(
                               std::chrono::high_resolution_clock::now() - begin).count();
--- a/src/collection.cpp
+++ b/src/collection.cpp
@ -261,12 +261,26 @@ Option<uint32_t> Collection::index_in_memory(const nlohmann::json &document, uin
    return Option<>(200);
 }

+void Collection::prune_document(nlohmann::json &document, const spp::sparse_hash_set<std::string> include_fields,
+                                const spp::sparse_hash_set<std::string> exclude_fields) {
+    auto it = document.begin();
+    for(; it != document.end(); ) {
+        if(exclude_fields.count(it.key()) != 0 || (include_fields.size() != 0 && include_fields.count(it.key()) == 0)) {
+            it = document.erase(it);
+        } else {
+            ++it;
+        }
+    }
+}
+
 Option<nlohmann::json> Collection::search(std::string query, const std::vector<std::string> search_fields,
                                  const std::string & simple_filter_query, const std::vector<std::string> & facet_fields,
                                  const std::vector<sort_by> & sort_fields, const int num_typos,
                                  const size_t per_page, const size_t page,
                                  const token_ordering token_order, const bool prefix,
-                                  const size_t drop_tokens_threshold) {
+                                  const size_t drop_tokens_threshold,
+                                  const spp::sparse_hash_set<std::string> include_fields,
+                                  const spp::sparse_hash_set<std::string> exclude_fields) {
    std::vector<facet> facets;

    // validate search fields
@ -535,10 +549,6 @@ Option<nlohmann::json> Collection::search(std::string query, const std::vector<s
            return Option<nlohmann::json>(500, "Error while parsing stored document.");
        }

-        wrapper_doc["document"] = document;
-        //wrapper_doc["match_score"] = field_order_kv.match_score;
-        //wrapper_doc["seq_id"] = (uint32_t) field_order_kv.key;
-
        // highlight query words in the result
        const std::string & field_name = search_fields[Index::FIELD_LIMIT_NUM - field_order_kv.field_id];
        field search_field = search_schema.at(field_name);
@ -630,6 +640,11 @@ Option<nlohmann::json> Collection::search(std::string query, const std::vector<s
                delete [] it->second;
                it->second = nullptr;
            }
+
+            prune_document(document, include_fields, exclude_fields);
+            wrapper_doc["document"] = document;
+            //wrapper_doc["match_score"] = field_order_kv.match_score;
+            //wrapper_doc["seq_id"] = (uint32_t) field_order_kv.key;
        }

        result["hits"].push_back(wrapper_doc);
--- a/test/collection_test.cpp
+++ b/test/collection_test.cpp
@ -1773,4 +1773,49 @@ TEST_F(CollectionTest, DeletionOfADocument) {
    ASSERT_EQ(3, num_keys);

    collectionManager.drop_collection("collection_for_del");
+}
+
+nlohmann::json get_prune_doc() {
+    nlohmann::json document;
+    document["one"] = 1;
+    document["two"] = 2;
+    document["three"] = 3;
+    document["four"] = 4;
+
+    return document;
+}
+
+TEST_F(CollectionTest, PruneFieldsFromDocument) {
+    nlohmann::json document = get_prune_doc();
+    Collection::prune_document(document, {"one", "two"}, spp::sparse_hash_set<std::string>());
+    ASSERT_EQ(2, document.size());
+    ASSERT_EQ(1, document["one"]);
+    ASSERT_EQ(2, document["two"]);
+
+    // exclude takes precedence
+    document = get_prune_doc();
+    Collection::prune_document(document, {"one"}, {"one"});
+    ASSERT_EQ(0, document.size());
+
+    // when no inclusion is specified, should return all fields not mentioned by exclusion list
+    document = get_prune_doc();
+    Collection::prune_document(document, spp::sparse_hash_set<std::string>(), {"three"});
+    ASSERT_EQ(3, document.size());
+    ASSERT_EQ(1, document["one"]);
+    ASSERT_EQ(2, document["two"]);
+    ASSERT_EQ(4, document["four"]);
+
+    document = get_prune_doc();
+    Collection::prune_document(document, spp::sparse_hash_set<std::string>(), spp::sparse_hash_set<std::string>());
+    ASSERT_EQ(4, document.size());
+
+    // when included field does not exist
+    document = get_prune_doc();
+    Collection::prune_document(document, {"notfound"}, spp::sparse_hash_set<std::string>());
+    ASSERT_EQ(0, document.size());
+
+    // when excluded field does not exist
+    document = get_prune_doc();
+    Collection::prune_document(document, spp::sparse_hash_set<std::string>(), {"notfound"});
+    ASSERT_EQ(4, document.size());
 }