From 0e1d70ebf68579f6e5d8bd060f5877a866e32c87 Mon Sep 17 00:00:00 2001 From: Kishore Nallan Date: Sun, 25 Dec 2022 21:04:22 +0530 Subject: [PATCH] Add flag to disable old highlight structure. --- include/collection.h | 3 +- src/collection.cpp | 61 +++++++++++++++++++++----------------- src/collection_manager.cpp | 7 ++++- 3 files changed, 42 insertions(+), 29 deletions(-) diff --git a/include/collection.h b/include/collection.h index 75ad74da..a7391cb5 100644 --- a/include/collection.h +++ b/include/collection.h @@ -408,7 +408,8 @@ public: const size_t facet_query_num_typos = 2, const size_t filter_curated_hits_option = 2, const bool prioritize_token_position = false, - const std::string& vector_query_str = "") const; + const std::string& vector_query_str = "", + const bool enable_highlight_v1 = true) const; Option get_filter_ids(const std::string & simple_filter_query, std::vector>& index_ids); diff --git a/src/collection.cpp b/src/collection.cpp index 221a8017..c22327ee 100644 --- a/src/collection.cpp +++ b/src/collection.cpp @@ -865,7 +865,8 @@ Option Collection::search(const std::string & raw_query, const size_t facet_query_num_typos, const size_t filter_curated_hits_option, const bool prioritize_token_position, - const std::string& vector_query_str) const { + const std::string& vector_query_str, + const bool enable_highlight_v1) const { std::shared_lock lock(mutex); @@ -1497,7 +1498,11 @@ Option Collection::search(const std::string & raw_query, } nlohmann::json wrapper_doc; - wrapper_doc["highlights"] = nlohmann::json::array(); + + if(enable_highlight_v1) { + wrapper_doc["highlights"] = nlohmann::json::array(); + } + std::vector highlights; StringUtils string_utils; @@ -1568,34 +1573,36 @@ Option Collection::search(const std::string & raw_query, prune_doc(highlight_res, hfield_names, tsl::htrie_set(), ""); } - std::sort(highlights.begin(), highlights.end()); + if(enable_highlight_v1) { + std::sort(highlights.begin(), highlights.end()); - for(const auto & highlight: highlights) { - auto field_it = search_schema.find(highlight.field); - if(field_it == search_schema.end() || field_it->nested) { - // nested field highlighting will be available only in the new highlight structure. - continue; - } - - nlohmann::json h_json = nlohmann::json::object(); - h_json["field"] = highlight.field; - - if(!highlight.indices.empty()) { - h_json["matched_tokens"] = highlight.matched_tokens; - h_json["indices"] = highlight.indices; - h_json["snippets"] = highlight.snippets; - if(!highlight.values.empty()) { - h_json["values"] = highlight.values; + for(const auto & highlight: highlights) { + auto field_it = search_schema.find(highlight.field); + if(field_it == search_schema.end() || field_it->nested) { + // nested field highlighting will be available only in the new highlight structure. + continue; } - } else { - h_json["matched_tokens"] = highlight.matched_tokens[0]; - h_json["snippet"] = highlight.snippets[0]; - if(!highlight.values.empty() && !highlight.values[0].empty()) { - h_json["value"] = highlight.values[0]; - } - } - wrapper_doc["highlights"].push_back(h_json); + nlohmann::json h_json = nlohmann::json::object(); + h_json["field"] = highlight.field; + + if(!highlight.indices.empty()) { + h_json["matched_tokens"] = highlight.matched_tokens; + h_json["indices"] = highlight.indices; + h_json["snippets"] = highlight.snippets; + if(!highlight.values.empty()) { + h_json["values"] = highlight.values; + } + } else { + h_json["matched_tokens"] = highlight.matched_tokens[0]; + h_json["snippet"] = highlight.snippets[0]; + if(!highlight.values.empty() && !highlight.values[0].empty()) { + h_json["value"] = highlight.values[0]; + } + } + + wrapper_doc["highlights"].push_back(h_json); + } } //wrapper_doc["seq_id"] = (uint32_t) field_order_kv->key; diff --git a/src/collection_manager.cpp b/src/collection_manager.cpp index 922fee38..55b20d1d 100644 --- a/src/collection_manager.cpp +++ b/src/collection_manager.cpp @@ -695,6 +695,8 @@ Option CollectionManager::do_search(std::map& re const char *EXHAUSTIVE_SEARCH = "exhaustive_search"; const char *SPLIT_JOIN_TOKENS = "split_join_tokens"; + const char *ENABLE_HIGHLIGHT_V1 = "enable_highlight_v1"; + // enrich params with values from embedded params for(auto& item: embedded_params.items()) { if(item.key() == "expires_at") { @@ -771,6 +773,7 @@ Option CollectionManager::do_search(std::map& re std::vector infixes; size_t max_extra_prefix = INT16_MAX; size_t max_extra_suffix = INT16_MAX; + bool enable_highlight_v1 = true; std::unordered_map unsigned_int_values = { {MIN_LEN_1TYPO, &min_len_1typo}, @@ -810,6 +813,7 @@ Option CollectionManager::do_search(std::map& re {PRE_SEGMENTED_QUERY, &pre_segmented_query}, {EXHAUSTIVE_SEARCH, &exhaustive_search}, {ENABLE_OVERRIDES, &enable_overrides}, + {ENABLE_HIGHLIGHT_V1, &enable_highlight_v1}, }; std::unordered_map*> str_list_values = { @@ -976,7 +980,8 @@ Option CollectionManager::do_search(std::map& re facet_query_num_typos, filter_curated_hits_option, prioritize_token_position, - vector_query + vector_query, + enable_highlight_v1 ); uint64_t timeMillis = std::chrono::duration_cast(