From 95112a808618e6cbcb560f5e31dad984087b36d3 Mon Sep 17 00:00:00 2001 From: Kishore Nallan Date: Tue, 8 May 2018 07:53:13 +0530 Subject: [PATCH] Support inclusion and exclusion of document fields that are returned in search response. --- include/collection.h | 7 ++++++- src/api.cpp | 22 +++++++++++++++++++- src/collection.cpp | 25 +++++++++++++++++----- test/collection_test.cpp | 45 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 92 insertions(+), 7 deletions(-) diff --git a/include/collection.h b/include/collection.h index 8f5ac786..f920b8b4 100644 --- a/include/collection.h +++ b/include/collection.h @@ -97,7 +97,9 @@ public: const std::vector & sort_fields, const int num_typos, const size_t per_page = 10, const size_t page = 1, const token_ordering token_order = FREQUENCY, const bool prefix = false, - const size_t drop_tokens_threshold = Index::DROP_TOKENS_THRESHOLD); + const size_t drop_tokens_threshold = Index::DROP_TOKENS_THRESHOLD, + const spp::sparse_hash_set include_fields = spp::sparse_hash_set(), + const spp::sparse_hash_set exclude_fields = spp::sparse_hash_set()); Option get(const std::string & id); @@ -105,6 +107,9 @@ public: Option index_in_memory(const nlohmann::json & document, uint32_t seq_id); + static void prune_document(nlohmann::json &document, const spp::sparse_hash_set include_fields, + const spp::sparse_hash_set exclude_fields); + static const int MAX_SEARCH_TOKENS = 10; static const int MAX_RESULTS = 500; diff --git a/src/api.cpp b/src/api.cpp index 2e5d5093..8d9f1e44 100644 --- a/src/api.cpp +++ b/src/api.cpp @@ -171,6 +171,8 @@ void get_search(http_req & req, http_res & res) { const char *PAGE = "page"; const char *CALLBACK = "callback"; const char *RANK_TOKENS_BY = "rank_tokens_by"; + const char *INCLUDE_FIELDS = "include_fields"; + const char *EXCLUDE_FIELDS = "exclude_fields"; if(req.params.count(NUM_TYPOS) == 0) { req.params[NUM_TYPOS] = "2"; @@ -200,6 +202,14 @@ void get_search(http_req & req, http_res & res) { req.params[PAGE] = "1"; } + if(req.params.count(INCLUDE_FIELDS) == 0) { + req.params[INCLUDE_FIELDS] = ""; + } + + if(req.params.count(EXCLUDE_FIELDS) == 0) { + req.params[EXCLUDE_FIELDS] = ""; + } + if(!StringUtils::is_uint64_t(req.params[DROP_TOKENS_THRESHOLD])) { return res.send_400("Parameter `" + std::string(DROP_TOKENS_THRESHOLD) + "` must be an unsigned integer."); } @@ -224,6 +234,15 @@ void get_search(http_req & req, http_res & res) { std::vector facet_fields; StringUtils::split(req.params[FACET_BY], facet_fields, ","); + std::vector include_fields_vec; + StringUtils::split(req.params[INCLUDE_FIELDS], include_fields_vec, ","); + + std::vector exclude_fields_vec; + StringUtils::split(req.params[EXCLUDE_FIELDS], exclude_fields_vec, ","); + + spp::sparse_hash_set include_fields(include_fields_vec.begin(), include_fields_vec.end()); + spp::sparse_hash_set exclude_fields(exclude_fields_vec.begin(), exclude_fields_vec.end()); + std::vector sort_fields; if(req.params.count(SORT_BY) != 0) { std::vector sort_field_strs; @@ -266,7 +285,8 @@ void get_search(http_req & req, http_res & res) { Option result_op = collection->search(req.params[QUERY], search_fields, filter_str, facet_fields, sort_fields, std::stoi(req.params[NUM_TYPOS]), std::stoi(req.params[PER_PAGE]), std::stoi(req.params[PAGE]), - token_order, prefix, drop_tokens_threshold); + token_order, prefix, drop_tokens_threshold, + include_fields, exclude_fields); uint64_t timeMillis = std::chrono::duration_cast( std::chrono::high_resolution_clock::now() - begin).count(); diff --git a/src/collection.cpp b/src/collection.cpp index fde6be63..391e8a83 100644 --- a/src/collection.cpp +++ b/src/collection.cpp @@ -261,12 +261,26 @@ Option Collection::index_in_memory(const nlohmann::json &document, uin return Option<>(200); } +void Collection::prune_document(nlohmann::json &document, const spp::sparse_hash_set include_fields, + const spp::sparse_hash_set exclude_fields) { + auto it = document.begin(); + for(; it != document.end(); ) { + if(exclude_fields.count(it.key()) != 0 || (include_fields.size() != 0 && include_fields.count(it.key()) == 0)) { + it = document.erase(it); + } else { + ++it; + } + } +} + Option Collection::search(std::string query, const std::vector search_fields, const std::string & simple_filter_query, const std::vector & facet_fields, const std::vector & sort_fields, const int num_typos, const size_t per_page, const size_t page, const token_ordering token_order, const bool prefix, - const size_t drop_tokens_threshold) { + const size_t drop_tokens_threshold, + const spp::sparse_hash_set include_fields, + const spp::sparse_hash_set exclude_fields) { std::vector facets; // validate search fields @@ -535,10 +549,6 @@ Option Collection::search(std::string query, const std::vector(500, "Error while parsing stored document."); } - wrapper_doc["document"] = document; - //wrapper_doc["match_score"] = field_order_kv.match_score; - //wrapper_doc["seq_id"] = (uint32_t) field_order_kv.key; - // highlight query words in the result const std::string & field_name = search_fields[Index::FIELD_LIMIT_NUM - field_order_kv.field_id]; field search_field = search_schema.at(field_name); @@ -630,6 +640,11 @@ Option Collection::search(std::string query, const std::vectorsecond; it->second = nullptr; } + + prune_document(document, include_fields, exclude_fields); + wrapper_doc["document"] = document; + //wrapper_doc["match_score"] = field_order_kv.match_score; + //wrapper_doc["seq_id"] = (uint32_t) field_order_kv.key; } result["hits"].push_back(wrapper_doc); diff --git a/test/collection_test.cpp b/test/collection_test.cpp index e25b2b58..eb1ec247 100644 --- a/test/collection_test.cpp +++ b/test/collection_test.cpp @@ -1773,4 +1773,49 @@ TEST_F(CollectionTest, DeletionOfADocument) { ASSERT_EQ(3, num_keys); collectionManager.drop_collection("collection_for_del"); +} + +nlohmann::json get_prune_doc() { + nlohmann::json document; + document["one"] = 1; + document["two"] = 2; + document["three"] = 3; + document["four"] = 4; + + return document; +} + +TEST_F(CollectionTest, PruneFieldsFromDocument) { + nlohmann::json document = get_prune_doc(); + Collection::prune_document(document, {"one", "two"}, spp::sparse_hash_set()); + ASSERT_EQ(2, document.size()); + ASSERT_EQ(1, document["one"]); + ASSERT_EQ(2, document["two"]); + + // exclude takes precedence + document = get_prune_doc(); + Collection::prune_document(document, {"one"}, {"one"}); + ASSERT_EQ(0, document.size()); + + // when no inclusion is specified, should return all fields not mentioned by exclusion list + document = get_prune_doc(); + Collection::prune_document(document, spp::sparse_hash_set(), {"three"}); + ASSERT_EQ(3, document.size()); + ASSERT_EQ(1, document["one"]); + ASSERT_EQ(2, document["two"]); + ASSERT_EQ(4, document["four"]); + + document = get_prune_doc(); + Collection::prune_document(document, spp::sparse_hash_set(), spp::sparse_hash_set()); + ASSERT_EQ(4, document.size()); + + // when included field does not exist + document = get_prune_doc(); + Collection::prune_document(document, {"notfound"}, spp::sparse_hash_set()); + ASSERT_EQ(0, document.size()); + + // when excluded field does not exist + document = get_prune_doc(); + Collection::prune_document(document, spp::sparse_hash_set(), {"notfound"}); + ASSERT_EQ(4, document.size()); } \ No newline at end of file