Allow field highlighting independent of query_by fields.

This commit is contained in:
Kishore Nallan 2021-06-17 18:24:11 +05:30
parent 72a240888e
commit dbeb00debe
4 changed files with 124 additions and 12 deletions

View File

@ -534,7 +534,8 @@ public:
size_t limit_hits=UINT32_MAX,
bool prioritize_exact_match=true,
bool pre_segmented_query=false,
bool enable_overrides=true) const;
bool enable_overrides=true,
const std::string& highlight_fields="") const;
Option<bool> get_filter_ids(const std::string & simple_filter_query,
std::vector<std::pair<size_t, uint32_t*>>& index_ids);

View File

@ -497,7 +497,7 @@ Option<nlohmann::json> Collection::search(const std::string & query, const std::
const std::string & simple_facet_query,
const size_t snippet_threshold,
const size_t highlight_affix_num_tokens,
const std::string & highlight_full_fields,
const std::string& highlight_full_fields,
size_t typo_tokens_threshold,
const std::string& pinned_hits_str,
const std::string& hidden_hits_str,
@ -509,7 +509,8 @@ Option<nlohmann::json> Collection::search(const std::string & query, const std::
size_t limit_hits,
bool prioritize_exact_match,
bool pre_segmented_query,
bool enable_overrides) const {
bool enable_overrides,
const std::string& highlight_fields) const {
std::shared_lock lock(mutex);
@ -1089,18 +1090,35 @@ Option<nlohmann::json> Collection::search(const std::string & query, const std::
spp::sparse_hash_set<std::string> fields_highlighted_fully;
StringUtils::split(highlight_full_fields, fields_highlighted_fully_vec, ",");
std::vector<std::string> fields_highlighted_vec;
std::vector<size_t> fields_highlighted_indices;
if(highlight_fields.empty()) {
for(size_t i = 0; i < search_fields.size(); i++) {
const auto& field_name = search_fields[i];
// should not pick excluded field for highlighting
if(exclude_fields.count(field_name) > 0) {
continue;
}
fields_highlighted_vec.emplace_back(field_name);
fields_highlighted_indices.push_back(i);
}
} else {
if(query != "*") {
StringUtils::split(highlight_fields, fields_highlighted_vec, ",");
for(size_t i = 0; i < fields_highlighted_vec.size(); i++) {
fields_highlighted_indices.push_back(0);
}
}
}
for(std::string & highlight_full_field: fields_highlighted_fully_vec) {
fields_highlighted_fully.emplace(highlight_full_field);
}
for(size_t i = 0; i < search_fields.size(); i++) {
const std::string& field_name = search_fields[i];
const std::vector<std::string>& q_tokens = field_query_tokens[i].q_include_tokens;
// should not pick excluded field for highlighting
if(exclude_fields.count(field_name) > 0) {
continue;
}
for(size_t i = 0; i < fields_highlighted_vec.size(); i++) {
const std::string& field_name = fields_highlighted_vec[i];
const std::vector<std::string>& q_tokens = field_query_tokens[fields_highlighted_indices[i]].q_include_tokens;
field search_field = search_schema.at(field_name);
if(query != "*" && (search_field.type == field_types::STRING ||

View File

@ -495,6 +495,7 @@ Option<bool> CollectionManager::do_search(std::map<std::string, std::string>& re
// list of fields which will be highlighted fully without snippeting
const char *HIGHLIGHT_FULL_FIELDS = "highlight_full_fields";
const char *HIGHLIGHT_FIELDS = "highlight_fields";
const char *HIGHLIGHT_START_TAG = "highlight_start_tag";
const char *HIGHLIGHT_END_TAG = "highlight_end_tag";
@ -546,6 +547,10 @@ Option<bool> CollectionManager::do_search(std::map<std::string, std::string>& re
req_params[HIGHLIGHT_FULL_FIELDS] = "";
}
if(req_params.count(HIGHLIGHT_FIELDS) == 0) {
req_params[HIGHLIGHT_FIELDS] = "";
}
if(req_params.count(HIGHLIGHT_START_TAG) == 0) {
req_params[HIGHLIGHT_START_TAG] = "<mark>";
}
@ -768,7 +773,8 @@ Option<bool> CollectionManager::do_search(std::map<std::string, std::string>& re
static_cast<size_t>(std::stol(req_params[LIMIT_HITS])),
prioritize_exact_match,
pre_segmented_query,
enable_overrides
enable_overrides,
req_params[HIGHLIGHT_FIELDS]
);
uint64_t timeMillis = std::chrono::duration_cast<std::chrono::milliseconds>(

View File

@ -54,5 +54,92 @@ TEST_F(CollectionSpecificTest, SearchTextWithHyphen) {
ASSERT_EQ(1, results["hits"].size());
ASSERT_STREQ("0", results["hits"][0]["document"]["id"].get<std::string>().c_str());
collectionManager.drop_collection("coll1");
}
TEST_F(CollectionSpecificTest, ExplicitHighlightFieldsConfig) {
std::vector<field> fields = {field("title", field_types::STRING, false),
field("description", field_types::STRING, false),
field("author", field_types::STRING, false),
field("points", field_types::INT32, false),};
Collection* coll1 = collectionManager.create_collection("coll1", 1, fields, "points").get();
nlohmann::json doc;
doc["id"] = "0";
doc["title"] = "The quick brown fox was too fast.";
doc["description"] = "A story about a brown fox who was fast.";
doc["author"] = "David Pernell";
doc["points"] = 100;
ASSERT_TRUE(coll1->add(doc.dump()).ok());
auto results = coll1->search("brown fox pernell", {"title"}, "", {}, {}, {2}, 10,
1, FREQUENCY, {false}, 1, spp::sparse_hash_set<std::string>(),
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 1, {}, {}, {}, 0,
"<mark>", "</mark>", {1}, 10000, true, false, true, "description,author").get();
ASSERT_EQ(1, results["found"].get<size_t>());
ASSERT_EQ(1, results["hits"].size());
ASSERT_EQ("0", results["hits"][0]["document"]["id"].get<std::string>());
ASSERT_EQ(2, results["hits"][0]["highlights"].size());
ASSERT_EQ("description", results["hits"][0]["highlights"][0]["field"].get<std::string>());
ASSERT_EQ("A story about a <mark>brown</mark> <mark>fox</mark> who was fast.", results["hits"][0]["highlights"][0]["snippet"].get<std::string>());
ASSERT_EQ("author", results["hits"][0]["highlights"][1]["field"].get<std::string>());
ASSERT_EQ("David <mark>Pernell</mark>", results["hits"][0]["highlights"][1]["snippet"].get<std::string>());
// excluded fields are NOT respected if explicit highlight fields are provided
results = coll1->search("brown fox pernell", {"title"}, "", {}, {}, {2}, 10,
1, FREQUENCY, {false}, 1, spp::sparse_hash_set<std::string>(),
{"description"}, 10, "", 30, 4, "", 1, {}, {}, {}, 0,
"<mark>", "</mark>", {1}, 10000, true, false, true, "description,author").get();
ASSERT_EQ(1, results["found"].get<size_t>());
ASSERT_EQ(1, results["hits"].size());
ASSERT_EQ("0", results["hits"][0]["document"]["id"].get<std::string>());
ASSERT_EQ(2, results["hits"][0]["highlights"].size());
ASSERT_FALSE(results["hits"][0]["document"].contains("description"));
ASSERT_EQ("description", results["hits"][0]["highlights"][0]["field"].get<std::string>());
ASSERT_EQ("author", results["hits"][0]["highlights"][1]["field"].get<std::string>());
// query not matching field selected for highlighting
results = coll1->search("pernell", {"title", "author"}, "", {}, {}, {2}, 10,
1, FREQUENCY, {false}, 1, spp::sparse_hash_set<std::string>(),
{"description"}, 10, "", 30, 4, "", 1, {}, {}, {}, 0,
"<mark>", "</mark>", {1,1}, 10000, true, false, true, "description").get();
ASSERT_EQ(1, results["found"].get<size_t>());
ASSERT_EQ(1, results["hits"].size());
ASSERT_EQ(0, results["hits"][0]["highlights"].size());
// wildcard query with search field names
results = coll1->search("*", {"title", "author"}, "", {}, {}, {2}, 10,
1, FREQUENCY, {false}, 1, spp::sparse_hash_set<std::string>(),
{"description"}, 10, "", 30, 4, "", 1, {}, {}, {}, 0,
"<mark>", "</mark>", {1,1}, 10000, true, false, true, "description,author").get();
ASSERT_EQ(1, results["found"].get<size_t>());
ASSERT_EQ(1, results["hits"].size());
ASSERT_EQ(0, results["hits"][0]["highlights"].size());
// wildcard query without search field names
results = coll1->search("*", {}, "", {}, {}, {2}, 10,
1, FREQUENCY, {false}, 1, spp::sparse_hash_set<std::string>(),
{"description"}, 10, "", 30, 4, "", 1, {}, {}, {}, 0,
"<mark>", "</mark>", {1,1}, 10000, true, false, true, "description,author").get();
ASSERT_EQ(1, results["found"].get<size_t>());
ASSERT_EQ(1, results["hits"].size());
ASSERT_EQ(0, results["hits"][0]["highlights"].size());
collectionManager.drop_collection("coll1");
}