diff --git a/include/collection.h b/include/collection.h index 76f278b5..22b12851 100644 --- a/include/collection.h +++ b/include/collection.h @@ -534,7 +534,8 @@ public: size_t limit_hits=UINT32_MAX, bool prioritize_exact_match=true, bool pre_segmented_query=false, - bool enable_overrides=true) const; + bool enable_overrides=true, + const std::string& highlight_fields="") const; Option get_filter_ids(const std::string & simple_filter_query, std::vector>& index_ids); diff --git a/src/collection.cpp b/src/collection.cpp index a69f6ce4..4414b39d 100644 --- a/src/collection.cpp +++ b/src/collection.cpp @@ -497,7 +497,7 @@ Option Collection::search(const std::string & query, const std:: const std::string & simple_facet_query, const size_t snippet_threshold, const size_t highlight_affix_num_tokens, - const std::string & highlight_full_fields, + const std::string& highlight_full_fields, size_t typo_tokens_threshold, const std::string& pinned_hits_str, const std::string& hidden_hits_str, @@ -509,7 +509,8 @@ Option Collection::search(const std::string & query, const std:: size_t limit_hits, bool prioritize_exact_match, bool pre_segmented_query, - bool enable_overrides) const { + bool enable_overrides, + const std::string& highlight_fields) const { std::shared_lock lock(mutex); @@ -1089,18 +1090,35 @@ Option Collection::search(const std::string & query, const std:: spp::sparse_hash_set fields_highlighted_fully; StringUtils::split(highlight_full_fields, fields_highlighted_fully_vec, ","); + std::vector fields_highlighted_vec; + std::vector fields_highlighted_indices; + if(highlight_fields.empty()) { + for(size_t i = 0; i < search_fields.size(); i++) { + const auto& field_name = search_fields[i]; + // should not pick excluded field for highlighting + if(exclude_fields.count(field_name) > 0) { + continue; + } + + fields_highlighted_vec.emplace_back(field_name); + fields_highlighted_indices.push_back(i); + } + } else { + if(query != "*") { + StringUtils::split(highlight_fields, fields_highlighted_vec, ","); + for(size_t i = 0; i < fields_highlighted_vec.size(); i++) { + fields_highlighted_indices.push_back(0); + } + } + } + for(std::string & highlight_full_field: fields_highlighted_fully_vec) { fields_highlighted_fully.emplace(highlight_full_field); } - for(size_t i = 0; i < search_fields.size(); i++) { - const std::string& field_name = search_fields[i]; - const std::vector& q_tokens = field_query_tokens[i].q_include_tokens; - - // should not pick excluded field for highlighting - if(exclude_fields.count(field_name) > 0) { - continue; - } + for(size_t i = 0; i < fields_highlighted_vec.size(); i++) { + const std::string& field_name = fields_highlighted_vec[i]; + const std::vector& q_tokens = field_query_tokens[fields_highlighted_indices[i]].q_include_tokens; field search_field = search_schema.at(field_name); if(query != "*" && (search_field.type == field_types::STRING || diff --git a/src/collection_manager.cpp b/src/collection_manager.cpp index 217c46a6..16f4d2fd 100644 --- a/src/collection_manager.cpp +++ b/src/collection_manager.cpp @@ -495,6 +495,7 @@ Option CollectionManager::do_search(std::map& re // list of fields which will be highlighted fully without snippeting const char *HIGHLIGHT_FULL_FIELDS = "highlight_full_fields"; + const char *HIGHLIGHT_FIELDS = "highlight_fields"; const char *HIGHLIGHT_START_TAG = "highlight_start_tag"; const char *HIGHLIGHT_END_TAG = "highlight_end_tag"; @@ -546,6 +547,10 @@ Option CollectionManager::do_search(std::map& re req_params[HIGHLIGHT_FULL_FIELDS] = ""; } + if(req_params.count(HIGHLIGHT_FIELDS) == 0) { + req_params[HIGHLIGHT_FIELDS] = ""; + } + if(req_params.count(HIGHLIGHT_START_TAG) == 0) { req_params[HIGHLIGHT_START_TAG] = ""; } @@ -768,7 +773,8 @@ Option CollectionManager::do_search(std::map& re static_cast(std::stol(req_params[LIMIT_HITS])), prioritize_exact_match, pre_segmented_query, - enable_overrides + enable_overrides, + req_params[HIGHLIGHT_FIELDS] ); uint64_t timeMillis = std::chrono::duration_cast( diff --git a/test/collection_specific_test.cpp b/test/collection_specific_test.cpp index fc9b3706..b5570d70 100644 --- a/test/collection_specific_test.cpp +++ b/test/collection_specific_test.cpp @@ -54,5 +54,92 @@ TEST_F(CollectionSpecificTest, SearchTextWithHyphen) { ASSERT_EQ(1, results["hits"].size()); ASSERT_STREQ("0", results["hits"][0]["document"]["id"].get().c_str()); + collectionManager.drop_collection("coll1"); +} + +TEST_F(CollectionSpecificTest, ExplicitHighlightFieldsConfig) { + std::vector fields = {field("title", field_types::STRING, false), + field("description", field_types::STRING, false), + field("author", field_types::STRING, false), + field("points", field_types::INT32, false),}; + + Collection* coll1 = collectionManager.create_collection("coll1", 1, fields, "points").get(); + + nlohmann::json doc; + doc["id"] = "0"; + doc["title"] = "The quick brown fox was too fast."; + doc["description"] = "A story about a brown fox who was fast."; + doc["author"] = "David Pernell"; + doc["points"] = 100; + + ASSERT_TRUE(coll1->add(doc.dump()).ok()); + + auto results = coll1->search("brown fox pernell", {"title"}, "", {}, {}, {2}, 10, + 1, FREQUENCY, {false}, 1, spp::sparse_hash_set(), + spp::sparse_hash_set(), 10, "", 30, 4, "", 1, {}, {}, {}, 0, + "", "", {1}, 10000, true, false, true, "description,author").get(); + + ASSERT_EQ(1, results["found"].get()); + ASSERT_EQ(1, results["hits"].size()); + + ASSERT_EQ("0", results["hits"][0]["document"]["id"].get()); + ASSERT_EQ(2, results["hits"][0]["highlights"].size()); + + ASSERT_EQ("description", results["hits"][0]["highlights"][0]["field"].get()); + ASSERT_EQ("A story about a brown fox who was fast.", results["hits"][0]["highlights"][0]["snippet"].get()); + + ASSERT_EQ("author", results["hits"][0]["highlights"][1]["field"].get()); + ASSERT_EQ("David Pernell", results["hits"][0]["highlights"][1]["snippet"].get()); + + // excluded fields are NOT respected if explicit highlight fields are provided + + results = coll1->search("brown fox pernell", {"title"}, "", {}, {}, {2}, 10, + 1, FREQUENCY, {false}, 1, spp::sparse_hash_set(), + {"description"}, 10, "", 30, 4, "", 1, {}, {}, {}, 0, + "", "", {1}, 10000, true, false, true, "description,author").get(); + + ASSERT_EQ(1, results["found"].get()); + ASSERT_EQ(1, results["hits"].size()); + + ASSERT_EQ("0", results["hits"][0]["document"]["id"].get()); + ASSERT_EQ(2, results["hits"][0]["highlights"].size()); + ASSERT_FALSE(results["hits"][0]["document"].contains("description")); + + ASSERT_EQ("description", results["hits"][0]["highlights"][0]["field"].get()); + ASSERT_EQ("author", results["hits"][0]["highlights"][1]["field"].get()); + + // query not matching field selected for highlighting + + results = coll1->search("pernell", {"title", "author"}, "", {}, {}, {2}, 10, + 1, FREQUENCY, {false}, 1, spp::sparse_hash_set(), + {"description"}, 10, "", 30, 4, "", 1, {}, {}, {}, 0, + "", "", {1,1}, 10000, true, false, true, "description").get(); + + ASSERT_EQ(1, results["found"].get()); + ASSERT_EQ(1, results["hits"].size()); + ASSERT_EQ(0, results["hits"][0]["highlights"].size()); + + // wildcard query with search field names + + results = coll1->search("*", {"title", "author"}, "", {}, {}, {2}, 10, + 1, FREQUENCY, {false}, 1, spp::sparse_hash_set(), + {"description"}, 10, "", 30, 4, "", 1, {}, {}, {}, 0, + "", "", {1,1}, 10000, true, false, true, "description,author").get(); + + ASSERT_EQ(1, results["found"].get()); + ASSERT_EQ(1, results["hits"].size()); + ASSERT_EQ(0, results["hits"][0]["highlights"].size()); + + // wildcard query without search field names + + results = coll1->search("*", {}, "", {}, {}, {2}, 10, + 1, FREQUENCY, {false}, 1, spp::sparse_hash_set(), + {"description"}, 10, "", 30, 4, "", 1, {}, {}, {}, 0, + "", "", {1,1}, 10000, true, false, true, "description,author").get(); + + ASSERT_EQ(1, results["found"].get()); + ASSERT_EQ(1, results["hits"].size()); + ASSERT_EQ(0, results["hits"][0]["highlights"].size()); + collectionManager.drop_collection("coll1"); } \ No newline at end of file