Support inclusion and exclusion of document fields that are returned in search response.

This commit is contained in:
Kishore Nallan 2018-05-08 07:53:13 +05:30
parent 3cdeff7814
commit 95112a8086
4 changed files with 92 additions and 7 deletions

View File

@ -97,7 +97,9 @@ public:
const std::vector<sort_by> & sort_fields, const int num_typos,
const size_t per_page = 10, const size_t page = 1,
const token_ordering token_order = FREQUENCY, const bool prefix = false,
const size_t drop_tokens_threshold = Index::DROP_TOKENS_THRESHOLD);
const size_t drop_tokens_threshold = Index::DROP_TOKENS_THRESHOLD,
const spp::sparse_hash_set<std::string> include_fields = spp::sparse_hash_set<std::string>(),
const spp::sparse_hash_set<std::string> exclude_fields = spp::sparse_hash_set<std::string>());
Option<nlohmann::json> get(const std::string & id);
@ -105,6 +107,9 @@ public:
Option<uint32_t> index_in_memory(const nlohmann::json & document, uint32_t seq_id);
static void prune_document(nlohmann::json &document, const spp::sparse_hash_set<std::string> include_fields,
const spp::sparse_hash_set<std::string> exclude_fields);
static const int MAX_SEARCH_TOKENS = 10;
static const int MAX_RESULTS = 500;

View File

@ -171,6 +171,8 @@ void get_search(http_req & req, http_res & res) {
const char *PAGE = "page";
const char *CALLBACK = "callback";
const char *RANK_TOKENS_BY = "rank_tokens_by";
const char *INCLUDE_FIELDS = "include_fields";
const char *EXCLUDE_FIELDS = "exclude_fields";
if(req.params.count(NUM_TYPOS) == 0) {
req.params[NUM_TYPOS] = "2";
@ -200,6 +202,14 @@ void get_search(http_req & req, http_res & res) {
req.params[PAGE] = "1";
}
if(req.params.count(INCLUDE_FIELDS) == 0) {
req.params[INCLUDE_FIELDS] = "";
}
if(req.params.count(EXCLUDE_FIELDS) == 0) {
req.params[EXCLUDE_FIELDS] = "";
}
if(!StringUtils::is_uint64_t(req.params[DROP_TOKENS_THRESHOLD])) {
return res.send_400("Parameter `" + std::string(DROP_TOKENS_THRESHOLD) + "` must be an unsigned integer.");
}
@ -224,6 +234,15 @@ void get_search(http_req & req, http_res & res) {
std::vector<std::string> facet_fields;
StringUtils::split(req.params[FACET_BY], facet_fields, ",");
std::vector<std::string> include_fields_vec;
StringUtils::split(req.params[INCLUDE_FIELDS], include_fields_vec, ",");
std::vector<std::string> exclude_fields_vec;
StringUtils::split(req.params[EXCLUDE_FIELDS], exclude_fields_vec, ",");
spp::sparse_hash_set<std::string> include_fields(include_fields_vec.begin(), include_fields_vec.end());
spp::sparse_hash_set<std::string> exclude_fields(exclude_fields_vec.begin(), exclude_fields_vec.end());
std::vector<sort_by> sort_fields;
if(req.params.count(SORT_BY) != 0) {
std::vector<std::string> sort_field_strs;
@ -266,7 +285,8 @@ void get_search(http_req & req, http_res & res) {
Option<nlohmann::json> result_op = collection->search(req.params[QUERY], search_fields, filter_str, facet_fields,
sort_fields, std::stoi(req.params[NUM_TYPOS]),
std::stoi(req.params[PER_PAGE]), std::stoi(req.params[PAGE]),
token_order, prefix, drop_tokens_threshold);
token_order, prefix, drop_tokens_threshold,
include_fields, exclude_fields);
uint64_t timeMillis = std::chrono::duration_cast<std::chrono::milliseconds>(
std::chrono::high_resolution_clock::now() - begin).count();

View File

@ -261,12 +261,26 @@ Option<uint32_t> Collection::index_in_memory(const nlohmann::json &document, uin
return Option<>(200);
}
void Collection::prune_document(nlohmann::json &document, const spp::sparse_hash_set<std::string> include_fields,
const spp::sparse_hash_set<std::string> exclude_fields) {
auto it = document.begin();
for(; it != document.end(); ) {
if(exclude_fields.count(it.key()) != 0 || (include_fields.size() != 0 && include_fields.count(it.key()) == 0)) {
it = document.erase(it);
} else {
++it;
}
}
}
Option<nlohmann::json> Collection::search(std::string query, const std::vector<std::string> search_fields,
const std::string & simple_filter_query, const std::vector<std::string> & facet_fields,
const std::vector<sort_by> & sort_fields, const int num_typos,
const size_t per_page, const size_t page,
const token_ordering token_order, const bool prefix,
const size_t drop_tokens_threshold) {
const size_t drop_tokens_threshold,
const spp::sparse_hash_set<std::string> include_fields,
const spp::sparse_hash_set<std::string> exclude_fields) {
std::vector<facet> facets;
// validate search fields
@ -535,10 +549,6 @@ Option<nlohmann::json> Collection::search(std::string query, const std::vector<s
return Option<nlohmann::json>(500, "Error while parsing stored document.");
}
wrapper_doc["document"] = document;
//wrapper_doc["match_score"] = field_order_kv.match_score;
//wrapper_doc["seq_id"] = (uint32_t) field_order_kv.key;
// highlight query words in the result
const std::string & field_name = search_fields[Index::FIELD_LIMIT_NUM - field_order_kv.field_id];
field search_field = search_schema.at(field_name);
@ -630,6 +640,11 @@ Option<nlohmann::json> Collection::search(std::string query, const std::vector<s
delete [] it->second;
it->second = nullptr;
}
prune_document(document, include_fields, exclude_fields);
wrapper_doc["document"] = document;
//wrapper_doc["match_score"] = field_order_kv.match_score;
//wrapper_doc["seq_id"] = (uint32_t) field_order_kv.key;
}
result["hits"].push_back(wrapper_doc);

View File

@ -1773,4 +1773,49 @@ TEST_F(CollectionTest, DeletionOfADocument) {
ASSERT_EQ(3, num_keys);
collectionManager.drop_collection("collection_for_del");
}
nlohmann::json get_prune_doc() {
nlohmann::json document;
document["one"] = 1;
document["two"] = 2;
document["three"] = 3;
document["four"] = 4;
return document;
}
TEST_F(CollectionTest, PruneFieldsFromDocument) {
nlohmann::json document = get_prune_doc();
Collection::prune_document(document, {"one", "two"}, spp::sparse_hash_set<std::string>());
ASSERT_EQ(2, document.size());
ASSERT_EQ(1, document["one"]);
ASSERT_EQ(2, document["two"]);
// exclude takes precedence
document = get_prune_doc();
Collection::prune_document(document, {"one"}, {"one"});
ASSERT_EQ(0, document.size());
// when no inclusion is specified, should return all fields not mentioned by exclusion list
document = get_prune_doc();
Collection::prune_document(document, spp::sparse_hash_set<std::string>(), {"three"});
ASSERT_EQ(3, document.size());
ASSERT_EQ(1, document["one"]);
ASSERT_EQ(2, document["two"]);
ASSERT_EQ(4, document["four"]);
document = get_prune_doc();
Collection::prune_document(document, spp::sparse_hash_set<std::string>(), spp::sparse_hash_set<std::string>());
ASSERT_EQ(4, document.size());
// when included field does not exist
document = get_prune_doc();
Collection::prune_document(document, {"notfound"}, spp::sparse_hash_set<std::string>());
ASSERT_EQ(0, document.size());
// when excluded field does not exist
document = get_prune_doc();
Collection::prune_document(document, spp::sparse_hash_set<std::string>(), {"notfound"});
ASSERT_EQ(4, document.size());
}