mirror of
https://github.com/typesense/typesense.git
synced 2025-05-19 21:22:25 +08:00
Allow field highlighting independent of query_by fields.
This commit is contained in:
parent
72a240888e
commit
dbeb00debe
@ -534,7 +534,8 @@ public:
|
||||
size_t limit_hits=UINT32_MAX,
|
||||
bool prioritize_exact_match=true,
|
||||
bool pre_segmented_query=false,
|
||||
bool enable_overrides=true) const;
|
||||
bool enable_overrides=true,
|
||||
const std::string& highlight_fields="") const;
|
||||
|
||||
Option<bool> get_filter_ids(const std::string & simple_filter_query,
|
||||
std::vector<std::pair<size_t, uint32_t*>>& index_ids);
|
||||
|
@ -497,7 +497,7 @@ Option<nlohmann::json> Collection::search(const std::string & query, const std::
|
||||
const std::string & simple_facet_query,
|
||||
const size_t snippet_threshold,
|
||||
const size_t highlight_affix_num_tokens,
|
||||
const std::string & highlight_full_fields,
|
||||
const std::string& highlight_full_fields,
|
||||
size_t typo_tokens_threshold,
|
||||
const std::string& pinned_hits_str,
|
||||
const std::string& hidden_hits_str,
|
||||
@ -509,7 +509,8 @@ Option<nlohmann::json> Collection::search(const std::string & query, const std::
|
||||
size_t limit_hits,
|
||||
bool prioritize_exact_match,
|
||||
bool pre_segmented_query,
|
||||
bool enable_overrides) const {
|
||||
bool enable_overrides,
|
||||
const std::string& highlight_fields) const {
|
||||
|
||||
std::shared_lock lock(mutex);
|
||||
|
||||
@ -1089,18 +1090,35 @@ Option<nlohmann::json> Collection::search(const std::string & query, const std::
|
||||
spp::sparse_hash_set<std::string> fields_highlighted_fully;
|
||||
StringUtils::split(highlight_full_fields, fields_highlighted_fully_vec, ",");
|
||||
|
||||
std::vector<std::string> fields_highlighted_vec;
|
||||
std::vector<size_t> fields_highlighted_indices;
|
||||
if(highlight_fields.empty()) {
|
||||
for(size_t i = 0; i < search_fields.size(); i++) {
|
||||
const auto& field_name = search_fields[i];
|
||||
// should not pick excluded field for highlighting
|
||||
if(exclude_fields.count(field_name) > 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
fields_highlighted_vec.emplace_back(field_name);
|
||||
fields_highlighted_indices.push_back(i);
|
||||
}
|
||||
} else {
|
||||
if(query != "*") {
|
||||
StringUtils::split(highlight_fields, fields_highlighted_vec, ",");
|
||||
for(size_t i = 0; i < fields_highlighted_vec.size(); i++) {
|
||||
fields_highlighted_indices.push_back(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for(std::string & highlight_full_field: fields_highlighted_fully_vec) {
|
||||
fields_highlighted_fully.emplace(highlight_full_field);
|
||||
}
|
||||
|
||||
for(size_t i = 0; i < search_fields.size(); i++) {
|
||||
const std::string& field_name = search_fields[i];
|
||||
const std::vector<std::string>& q_tokens = field_query_tokens[i].q_include_tokens;
|
||||
|
||||
// should not pick excluded field for highlighting
|
||||
if(exclude_fields.count(field_name) > 0) {
|
||||
continue;
|
||||
}
|
||||
for(size_t i = 0; i < fields_highlighted_vec.size(); i++) {
|
||||
const std::string& field_name = fields_highlighted_vec[i];
|
||||
const std::vector<std::string>& q_tokens = field_query_tokens[fields_highlighted_indices[i]].q_include_tokens;
|
||||
|
||||
field search_field = search_schema.at(field_name);
|
||||
if(query != "*" && (search_field.type == field_types::STRING ||
|
||||
|
@ -495,6 +495,7 @@ Option<bool> CollectionManager::do_search(std::map<std::string, std::string>& re
|
||||
|
||||
// list of fields which will be highlighted fully without snippeting
|
||||
const char *HIGHLIGHT_FULL_FIELDS = "highlight_full_fields";
|
||||
const char *HIGHLIGHT_FIELDS = "highlight_fields";
|
||||
|
||||
const char *HIGHLIGHT_START_TAG = "highlight_start_tag";
|
||||
const char *HIGHLIGHT_END_TAG = "highlight_end_tag";
|
||||
@ -546,6 +547,10 @@ Option<bool> CollectionManager::do_search(std::map<std::string, std::string>& re
|
||||
req_params[HIGHLIGHT_FULL_FIELDS] = "";
|
||||
}
|
||||
|
||||
if(req_params.count(HIGHLIGHT_FIELDS) == 0) {
|
||||
req_params[HIGHLIGHT_FIELDS] = "";
|
||||
}
|
||||
|
||||
if(req_params.count(HIGHLIGHT_START_TAG) == 0) {
|
||||
req_params[HIGHLIGHT_START_TAG] = "<mark>";
|
||||
}
|
||||
@ -768,7 +773,8 @@ Option<bool> CollectionManager::do_search(std::map<std::string, std::string>& re
|
||||
static_cast<size_t>(std::stol(req_params[LIMIT_HITS])),
|
||||
prioritize_exact_match,
|
||||
pre_segmented_query,
|
||||
enable_overrides
|
||||
enable_overrides,
|
||||
req_params[HIGHLIGHT_FIELDS]
|
||||
);
|
||||
|
||||
uint64_t timeMillis = std::chrono::duration_cast<std::chrono::milliseconds>(
|
||||
|
@ -54,5 +54,92 @@ TEST_F(CollectionSpecificTest, SearchTextWithHyphen) {
|
||||
ASSERT_EQ(1, results["hits"].size());
|
||||
|
||||
ASSERT_STREQ("0", results["hits"][0]["document"]["id"].get<std::string>().c_str());
|
||||
collectionManager.drop_collection("coll1");
|
||||
}
|
||||
|
||||
TEST_F(CollectionSpecificTest, ExplicitHighlightFieldsConfig) {
|
||||
std::vector<field> fields = {field("title", field_types::STRING, false),
|
||||
field("description", field_types::STRING, false),
|
||||
field("author", field_types::STRING, false),
|
||||
field("points", field_types::INT32, false),};
|
||||
|
||||
Collection* coll1 = collectionManager.create_collection("coll1", 1, fields, "points").get();
|
||||
|
||||
nlohmann::json doc;
|
||||
doc["id"] = "0";
|
||||
doc["title"] = "The quick brown fox was too fast.";
|
||||
doc["description"] = "A story about a brown fox who was fast.";
|
||||
doc["author"] = "David Pernell";
|
||||
doc["points"] = 100;
|
||||
|
||||
ASSERT_TRUE(coll1->add(doc.dump()).ok());
|
||||
|
||||
auto results = coll1->search("brown fox pernell", {"title"}, "", {}, {}, {2}, 10,
|
||||
1, FREQUENCY, {false}, 1, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 1, {}, {}, {}, 0,
|
||||
"<mark>", "</mark>", {1}, 10000, true, false, true, "description,author").get();
|
||||
|
||||
ASSERT_EQ(1, results["found"].get<size_t>());
|
||||
ASSERT_EQ(1, results["hits"].size());
|
||||
|
||||
ASSERT_EQ("0", results["hits"][0]["document"]["id"].get<std::string>());
|
||||
ASSERT_EQ(2, results["hits"][0]["highlights"].size());
|
||||
|
||||
ASSERT_EQ("description", results["hits"][0]["highlights"][0]["field"].get<std::string>());
|
||||
ASSERT_EQ("A story about a <mark>brown</mark> <mark>fox</mark> who was fast.", results["hits"][0]["highlights"][0]["snippet"].get<std::string>());
|
||||
|
||||
ASSERT_EQ("author", results["hits"][0]["highlights"][1]["field"].get<std::string>());
|
||||
ASSERT_EQ("David <mark>Pernell</mark>", results["hits"][0]["highlights"][1]["snippet"].get<std::string>());
|
||||
|
||||
// excluded fields are NOT respected if explicit highlight fields are provided
|
||||
|
||||
results = coll1->search("brown fox pernell", {"title"}, "", {}, {}, {2}, 10,
|
||||
1, FREQUENCY, {false}, 1, spp::sparse_hash_set<std::string>(),
|
||||
{"description"}, 10, "", 30, 4, "", 1, {}, {}, {}, 0,
|
||||
"<mark>", "</mark>", {1}, 10000, true, false, true, "description,author").get();
|
||||
|
||||
ASSERT_EQ(1, results["found"].get<size_t>());
|
||||
ASSERT_EQ(1, results["hits"].size());
|
||||
|
||||
ASSERT_EQ("0", results["hits"][0]["document"]["id"].get<std::string>());
|
||||
ASSERT_EQ(2, results["hits"][0]["highlights"].size());
|
||||
ASSERT_FALSE(results["hits"][0]["document"].contains("description"));
|
||||
|
||||
ASSERT_EQ("description", results["hits"][0]["highlights"][0]["field"].get<std::string>());
|
||||
ASSERT_EQ("author", results["hits"][0]["highlights"][1]["field"].get<std::string>());
|
||||
|
||||
// query not matching field selected for highlighting
|
||||
|
||||
results = coll1->search("pernell", {"title", "author"}, "", {}, {}, {2}, 10,
|
||||
1, FREQUENCY, {false}, 1, spp::sparse_hash_set<std::string>(),
|
||||
{"description"}, 10, "", 30, 4, "", 1, {}, {}, {}, 0,
|
||||
"<mark>", "</mark>", {1,1}, 10000, true, false, true, "description").get();
|
||||
|
||||
ASSERT_EQ(1, results["found"].get<size_t>());
|
||||
ASSERT_EQ(1, results["hits"].size());
|
||||
ASSERT_EQ(0, results["hits"][0]["highlights"].size());
|
||||
|
||||
// wildcard query with search field names
|
||||
|
||||
results = coll1->search("*", {"title", "author"}, "", {}, {}, {2}, 10,
|
||||
1, FREQUENCY, {false}, 1, spp::sparse_hash_set<std::string>(),
|
||||
{"description"}, 10, "", 30, 4, "", 1, {}, {}, {}, 0,
|
||||
"<mark>", "</mark>", {1,1}, 10000, true, false, true, "description,author").get();
|
||||
|
||||
ASSERT_EQ(1, results["found"].get<size_t>());
|
||||
ASSERT_EQ(1, results["hits"].size());
|
||||
ASSERT_EQ(0, results["hits"][0]["highlights"].size());
|
||||
|
||||
// wildcard query without search field names
|
||||
|
||||
results = coll1->search("*", {}, "", {}, {}, {2}, 10,
|
||||
1, FREQUENCY, {false}, 1, spp::sparse_hash_set<std::string>(),
|
||||
{"description"}, 10, "", 30, 4, "", 1, {}, {}, {}, 0,
|
||||
"<mark>", "</mark>", {1,1}, 10000, true, false, true, "description,author").get();
|
||||
|
||||
ASSERT_EQ(1, results["found"].get<size_t>());
|
||||
ASSERT_EQ(1, results["hits"].size());
|
||||
ASSERT_EQ(0, results["hits"][0]["highlights"].size());
|
||||
|
||||
collectionManager.drop_collection("coll1");
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user