From 688bd218161f90b00ecfb8ec5c83d98e799421c0 Mon Sep 17 00:00:00 2001 From: Kishore Nallan Date: Tue, 13 Jun 2017 14:53:28 -0500 Subject: [PATCH] Show query snippet as a separate field instead of modifying the field value directly. --- src/collection.cpp | 33 +++++++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/src/collection.cpp b/src/collection.cpp index 8b34e67d..f75d6412 100644 --- a/src/collection.cpp +++ b/src/collection.cpp @@ -656,25 +656,42 @@ nlohmann::json Collection::search(std::string query, const std::vector tokens; StringUtils::split(document[field_name], tokens, " "); - for(size_t i = 1; i <= field_order_kv.second.offset_diffs[0]; i++) { + std::vector token_indices; + char num_tokens_found = field_order_kv.second.offset_diffs[0]; + for(size_t i = 1; i <= num_tokens_found; i++) { size_t token_index = (size_t)(field_order_kv.second.start_offset + field_order_kv.second.offset_diffs[i]); - tokens[token_index] = "" + tokens[token_index] + ""; + token_indices.push_back(token_index); } - std::stringstream ss; + auto minmax = std::minmax_element(token_indices.begin(), token_indices.end()); - for(size_t token_index = 0; token_index < tokens.size(); ++token_index) { - if(token_index != 0) { - ss << " "; + // pick surrounding tokens within N tokens of min_index and max_index for the snippet + const size_t start_index = std::max(0, (int)(*(minmax.first)-5)); + const size_t end_index = std::min((int)tokens.size(), (int)(*(minmax.second)+5)); + + std::stringstream snippet_stream; + size_t token_index = 0; + + for(size_t snippet_index = start_index; snippet_index < end_index; snippet_index++) { + if(snippet_index != 0) { + snippet_stream << " "; + } + + if(snippet_index == token_indices[token_index]) { + token_index++; + snippet_stream << "" + tokens[snippet_index] + ""; + } else { + snippet_stream << tokens[snippet_index]; } - ss << tokens[token_index]; } - document[field_name] = ss.str(); + document["_snippets"] = nlohmann::json::object(); + document["_snippets"][field_name] = snippet_stream.str(); } result["hits"].push_back(document);