Split highlight into meta/snippet/full.

This commit is contained in:
Kishore Nallan 2022-07-27 17:47:21 +05:30
parent 0399c1ac72
commit 5a220e7398
3 changed files with 488 additions and 84 deletions

View File

@ -124,13 +124,15 @@ private:
std::string get_seq_id_key(uint32_t seq_id) const;
void highlight_result(const std::string& raw_query,
void highlight_result(const std::string& h_obj,
const field &search_field,
const size_t search_field_index,
const tsl::htrie_map<char, token_leaf>& qtoken_leaves,
const std::vector<std::string>& q_tokens,
const KV* field_order_kv, const nlohmann::json &document,
nlohmann::json& highlight_doc,
nlohmann::json& highlight_full_doc,
nlohmann::json& highlight_meta,
StringUtils & string_utils,
const size_t snippet_threshold,
const size_t highlight_affix_num_tokens,
@ -139,7 +141,9 @@ private:
const std::string& highlight_start_tag,
const std::string& highlight_end_tag,
const uint8_t* index_symbols,
highlight_t &highlight) const;
highlight_t &highlight,
bool& found_highlight,
bool& found_full_highlight) const;
void remove_document(const nlohmann::json & document, const uint32_t seq_id, bool remove_from_store);
@ -437,8 +441,8 @@ public:
void process_highlight_fields(const std::vector<std::string>& search_fields,
const tsl::htrie_set<char>& exclude_fields,
const tsl::htrie_set<char>& include_fields,
const string& highlight_fields,
const std::string& highlight_full_fields,
const std::vector<std::string>& highlight_field_names,
const std::vector<std::string>& highlight_full_field_names,
const std::vector<enable_t>& infixes,
std::vector<std::string>& q_tokens,
const tsl::htrie_map<char, token_leaf>& qtoken_set,
@ -454,29 +458,32 @@ public:
};
template<class T>
bool highlight_nested_field(const nlohmann::json& doc, nlohmann::json& obj,
bool highlight_nested_field(const nlohmann::json& hdoc, nlohmann::json& hobj,
const nlohmann::json& fdoc, nlohmann::json& fobj,
std::vector<std::string>& path_parts, size_t path_index, T func) {
if(path_index == path_parts.size()) {
// end of path: guaranteed to be a string
if(!obj.is_string()) {
if(!hobj.is_string()) {
return false;
}
func(obj);
func(hobj, fobj);
}
const std::string& fragment = path_parts[path_index];
const auto& it = obj.find(fragment);
const auto& it = hobj.find(fragment);
if(it != obj.end()) {
if(it != hobj.end()) {
if(it.value().is_array()) {
bool resolved = false;
for(auto& ele: it.value()) {
resolved |= highlight_nested_field(doc, ele, path_parts, path_index + 1, func);
for(size_t i = 0; i < it.value().size(); i++) {
auto& h_ele = it.value().at(i);
auto& f_ele = fobj[fragment][i];
resolved |= highlight_nested_field(hdoc, h_ele, fdoc, f_ele, path_parts, path_index + 1, func);
}
return resolved;
} else {
return highlight_nested_field(doc, it.value(), path_parts, path_index + 1, func);
return highlight_nested_field(hdoc, it.value(), fdoc, fobj[fragment], path_parts, path_index + 1, func);
}
} {
return false;

View File

@ -1269,15 +1269,23 @@ Option<nlohmann::json> Collection::search(const std::string & raw_query,
// handle which fields have to be highlighted
std::vector<highlight_field_t> highlight_items;
tsl::htrie_set<char> hfield_names;
bool has_atleast_one_fully_highlighted_field = false;
std::vector<std::string> highlight_field_names;
StringUtils::split(highlight_fields, highlight_field_names, ",");
std::vector<std::string> highlight_full_field_names;
StringUtils::split(highlight_full_fields, highlight_full_field_names, ",");
if(query != "*") {
process_highlight_fields(search_fields, include_fields_full, exclude_fields_full, highlight_fields,
highlight_full_fields, infixes, q_tokens, search_params->qtoken_set,
highlight_items);
process_highlight_fields(search_fields, include_fields_full, exclude_fields_full,
highlight_field_names, highlight_full_field_names, infixes, q_tokens,
search_params->qtoken_set, highlight_items);
for(auto& highlight_item: highlight_items) {
hfield_names.insert(highlight_item.name);
if(highlight_item.fully_highlighted) {
has_atleast_one_fully_highlighted_field = true;
}
}
}
@ -1320,12 +1328,22 @@ Option<nlohmann::json> Collection::search(const std::string & raw_query,
continue;
}
nlohmann::json highlight_doc;
nlohmann::json highlight_res;
if(!highlight_items.empty()) {
highlight_doc = document;
remove_flat_fields(highlight_doc);
highlight_doc.erase("id");
highlight_res["meta"] = nlohmann::json::object();
highlight_res["snippet"] = document;
remove_flat_fields(highlight_res["snippet"]);
highlight_res["snippet"].erase("id");
if(has_atleast_one_fully_highlighted_field) {
highlight_res["full"] = document;
remove_flat_fields(highlight_res["full"]);
highlight_res["full"].erase("id");
} else {
highlight_res["full"] = nlohmann::json::object();
}
}
nlohmann::json wrapper_doc;
@ -1333,6 +1351,9 @@ Option<nlohmann::json> Collection::search(const std::string & raw_query,
std::vector<highlight_t> highlights;
StringUtils string_utils;
tsl::htrie_set<char> hfield_names;
tsl::htrie_set<char> h_full_field_names;
for(size_t i = 0; i < highlight_items.size(); i++) {
auto& highlight_item = highlight_items[i];
const std::string& field_name = highlight_item.name;
@ -1347,18 +1368,48 @@ Option<nlohmann::json> Collection::search(const std::string & raw_query,
highlight_t highlight;
highlight.field = search_field.name;
bool found_highlight = false;
bool found_full_highlight = false;
highlight_result(raw_query, search_field, i, highlight_item.qtoken_leaves, q_tokens, field_order_kv,
document, highlight_doc, string_utils, snippet_threshold,
document, highlight_res["snippet"], highlight_res["full"], highlight_res["meta"],
string_utils, snippet_threshold,
highlight_affix_num_tokens, highlight_item.fully_highlighted, highlight_item.infix,
highlight_start_tag, highlight_end_tag, index_symbols, highlight);
highlight_start_tag, highlight_end_tag, index_symbols, highlight,
found_highlight, found_full_highlight);
if(!highlight.snippets.empty()) {
highlights.push_back(highlight);
}
if(found_highlight) {
hfield_names.insert(search_field.name);
if(found_full_highlight) {
h_full_field_names.insert(search_field.name);
}
}
}
}
// explicit highlight fields could be parent of searched fields, so we will take a pass at that
for(auto& hfield_name: highlight_full_field_names) {
auto it = h_full_field_names.equal_prefix_range(hfield_name);
if(it.first != it.second) {
h_full_field_names.insert(hfield_name);
}
}
for(auto& hfield_name: highlight_field_names) {
auto it = hfield_names.equal_prefix_range(hfield_name);
if(it.first != it.second) {
hfield_names.insert(hfield_name);
}
}
// remove fields from highlight doc that were not highlighted
prune_doc(highlight_doc, hfield_names, tsl::htrie_set<char>(), "");
prune_doc(highlight_res["snippet"], hfield_names, tsl::htrie_set<char>(), "");
prune_doc(highlight_res["full"], h_full_field_names, tsl::htrie_set<char>(), "");
std::sort(highlights.begin(), highlights.end());
for(const auto & highlight: highlights) {
@ -1403,7 +1454,7 @@ Option<nlohmann::json> Collection::search(const std::string & raw_query,
remove_flat_fields(document);
wrapper_doc["document"] = document;
wrapper_doc["highlight"] = highlight_doc;
wrapper_doc["highlight"] = highlight_res;
if(field_order_kv->match_score_index == CURATED_RECORD_IDENTIFIER) {
wrapper_doc["curated"] = true;
@ -1698,8 +1749,8 @@ void Collection::populate_text_match_info(nlohmann::json& info, uint64_t match_s
void Collection::process_highlight_fields(const std::vector<std::string>& search_fields,
const tsl::htrie_set<char>& include_fields,
const tsl::htrie_set<char>& exclude_fields,
const string& highlight_fields,
const std::string& highlight_full_fields,
const std::vector<std::string>& highlight_field_names,
const std::vector<std::string>& highlight_full_field_names,
const std::vector<enable_t>& infixes,
std::vector<std::string>& q_tokens,
const tsl::htrie_map<char, token_leaf>& qtoken_set,
@ -1707,10 +1758,8 @@ void Collection::process_highlight_fields(const std::vector<std::string>& search
// identify full highlight fields
spp::sparse_hash_set<std::string> fields_highlighted_fully_set;
std::vector<std::string> fields_highlighted_fully_vec;
StringUtils::split(highlight_full_fields, fields_highlighted_fully_vec, ",");
std::vector<std::string> fields_highlighted_fully_expanded;
for(std::string & highlight_full_field: fields_highlighted_fully_vec) {
for(const std::string& highlight_full_field: highlight_full_field_names) {
extract_field_name(highlight_full_field, search_schema, fields_highlighted_fully_expanded);
}
@ -1730,7 +1779,7 @@ void Collection::process_highlight_fields(const std::vector<std::string>& search
}
}
if(highlight_fields.empty()) {
if(highlight_field_names.empty()) {
for(size_t i = 0; i < search_fields.size(); i++) {
const auto& field_name = search_fields[i];
if(exclude_fields.count(field_name) != 0) {
@ -1748,17 +1797,20 @@ void Collection::process_highlight_fields(const std::vector<std::string>& search
highlight_items.emplace_back(field_name, fully_highlighted, infixed);
}
} else {
std::vector<std::string> highlight_field_names;
StringUtils::split(highlight_fields, highlight_field_names, ",");
std::vector<std::string> highlight_field_names_expanded;
for(size_t i = 0; i < highlight_field_names.size(); i++) {
if(search_schema.count(highlight_field_names[i]) == 0) {
extract_field_name(highlight_field_names[i], search_schema, highlight_field_names_expanded);
}
for(size_t i = 0; i < highlight_field_names_expanded.size(); i++) {
const auto& highlight_field_name = highlight_field_names_expanded[i];
if(search_schema.count(highlight_field_name) == 0) {
// ignore fields not part of schema
continue;
}
bool fully_highlighted = (fields_highlighted_fully_set.count(highlight_field_names[i]) != 0);
bool infixed = (fields_infixed_set.count(highlight_field_names[i]) != 0);
highlight_items.emplace_back(highlight_field_names[i], fully_highlighted, infixed);
bool fully_highlighted = (fields_highlighted_fully_set.count(highlight_field_name) != 0);
bool infixed = (fields_infixed_set.count(highlight_field_name) != 0);
highlight_items.emplace_back(highlight_field_name, fully_highlighted, infixed);
}
}
@ -2047,6 +2099,8 @@ void Collection::highlight_result(const std::string& raw_query, const field &sea
const std::vector<std::string>& q_tokens,
const KV* field_order_kv, const nlohmann::json & document,
nlohmann::json& highlight_doc,
nlohmann::json& highlight_full_doc,
nlohmann::json& highlight_meta,
StringUtils & string_utils,
const size_t snippet_threshold,
const size_t highlight_affix_num_tokens,
@ -2055,12 +2109,16 @@ void Collection::highlight_result(const std::string& raw_query, const field &sea
const std::string& highlight_start_tag,
const std::string& highlight_end_tag,
const uint8_t* index_symbols,
highlight_t& highlight) const {
highlight_t& highlight,
bool& found_highlight,
bool& found_full_highlight) const {
if(q_tokens.size() == 1 && q_tokens[0] == "*") {
return;
}
tsl::htrie_set<char> matched_tokens;
bool is_cyrillic = Tokenizer::is_cyrillic(search_field.locale);
bool normalise = is_cyrillic ? false : true;
@ -2123,15 +2181,18 @@ void Collection::highlight_result(const std::string& raw_query, const field &sea
std::vector<std::string> path_parts;
StringUtils::split(search_field.name, path_parts, ".");
highlight_nested_field(highlight_doc, highlight_doc, path_parts, 0, [&](nlohmann::json& str_obj) {
highlight_nested_field(highlight_doc, highlight_doc, highlight_full_doc, highlight_full_doc,
path_parts, 0, [&](nlohmann::json& h_obj, nlohmann::json& f_obj) {
Match match;
match_index_t match_index(match, 0, 0);
int last_valid_offset_index = -1;
size_t last_valid_offset = 0;
std::string text = str_obj.get<std::string>();
bool found_higlight = handle_highlight_text(text, normalise, search_field, symbols_to_index,
token_separators, highlight, string_utils, is_cyrillic,
highlight_t array_highlight = highlight;
std::string text = h_obj.get<std::string>();
handle_highlight_text(text, normalise, search_field, symbols_to_index,
token_separators, array_highlight, string_utils, is_cyrillic,
highlight_affix_num_tokens,
qtoken_leaves, last_valid_offset_index, match,
last_raw_q_token,
@ -2139,11 +2200,30 @@ void Collection::highlight_result(const std::string& raw_query, const field &sea
raw_query_tokens,
last_valid_offset, highlight_start_tag, highlight_end_tag,
index_symbols, match_index);
if(!highlight.snippets.empty()) {
str_obj = highlight.snippets[0];
if(!array_highlight.snippets.empty()) {
h_obj = array_highlight.snippets[0];
found_highlight = found_highlight || true;
for(auto& token_vec: array_highlight.matched_tokens) {
for(auto& token: token_vec) {
matched_tokens.insert(token);
}
}
}
if(highlight_fully && !array_highlight.values.empty()) {
f_obj = array_highlight.values[0];;
found_full_highlight = found_full_highlight || true;
}
});
if(found_highlight) {
highlight_meta[search_field.name] = nlohmann::json::object();
for(auto it = matched_tokens.begin(); it != matched_tokens.end(); ++it) {
highlight_meta[search_field.name]["matched_tokens"].push_back(it.key());
}
}
return ;
}
@ -2228,6 +2308,18 @@ void Collection::highlight_result(const std::string& raw_query, const field &sea
last_valid_offset, highlight_start_tag, highlight_end_tag,
index_symbols, match_index);
if(!highlight.snippets.empty()) {
found_highlight = found_highlight || true;
for(auto& token_vec: highlight.matched_tokens) {
for(auto& token: token_vec) {
matched_tokens.insert(token);
}
}
}
if(!highlight.values.empty()) {
found_full_highlight = found_full_highlight || true;
}
}
highlight.field = search_field.name;
@ -2237,27 +2329,55 @@ void Collection::highlight_result(const std::string& raw_query, const field &sea
highlight.match_score = match_indices[0].match_score;
}
if(search_field.nested) {
std::vector<std::string> parts;
StringUtils::split(search_field.name, parts, ".");
nlohmann::json* val = highlight_doc.contains(parts[0]) ? &highlight_doc[parts[0]] : nullptr;
// in-place highlighting under the new highlight structure
std::vector<std::string> parts;
StringUtils::split(search_field.name, parts, ".");
nlohmann::json* hval = highlight_doc.contains(parts[0]) ? &highlight_doc[parts[0]] : nullptr;
nlohmann::json* fval = highlight_full_doc.contains(parts[0]) ? &highlight_full_doc[parts[0]] : nullptr;
for(size_t i = 1; val != nullptr && i < parts.size(); i++) {
const auto& part = parts[i];
if(val->contains(part)) {
val = &val->at(part);
} else {
val = nullptr;
for(size_t i = 1; hval != nullptr && i < parts.size(); i++) {
const auto& part = parts[i];
if(hval->contains(part)) {
hval = &hval->at(part);
} else {
hval = nullptr;
}
}
for(size_t i = 1; fval != nullptr && i < parts.size(); i++) {
const auto& part = parts[i];
if(fval->contains(part)) {
fval = &fval->at(part);
} else {
fval = nullptr;
}
}
if(hval) {
if(highlight.indices.empty()) {
*hval = highlight.snippets[0];
} else {
if(hval->is_array()) {
for(size_t hi = 0; hi < highlight.indices.size(); hi++) {
hval->at(highlight.indices[hi]) = highlight.snippets[hi];
}
}
}
if(val) {
highlight_meta[search_field.name] = nlohmann::json::object();
for(auto it = matched_tokens.begin(); it != matched_tokens.end(); ++it) {
highlight_meta[search_field.name]["matched_tokens"].push_back(it.key());
}
}
if(fval) {
if(!highlight.values.empty()) {
if(highlight.indices.empty()) {
*val = highlight.snippets[0];
*fval = highlight.values[0];
} else {
if(val->is_array()) {
for(size_t hi = 0; hi < highlight.indices.size(); hi++) {
val->at(highlight.indices[hi]) = highlight.snippets[hi];
if(fval->is_array()) {
for(size_t hi = 0; hi < highlight.values.size(); hi++) {
fval->at(highlight.indices[hi]) = highlight.values[hi];
}
}
}

View File

@ -326,45 +326,27 @@ TEST_F(CollectionNestedFieldsTest, SearchOnFieldsOnWildcardSchema) {
"locations":[
{
"address":{
"city":"Beaverton",
"products":[
"shoes",
"tshirts"
],
"street":"One Bowerman Drive"
},
"country":"USA"
}
},
{
"address":{
"city":"Thornhill",
"products":[
"sneakers",
"shoes"
],
"street":"175 <mark>Commerce</mark> Valley"
},
"country":"Canada"
}
}
]
})"_json;
ASSERT_EQ(highlight_doc.dump(), results["hits"][0]["highlight"].dump());
ASSERT_EQ(highlight_doc.dump(), results["hits"][0]["highlight"]["snippet"].dump());
ASSERT_EQ(0, results["hits"][0]["highlights"].size());
// search specific nested fields
// search specific nested fields, only matching field is highlighted by default
results = coll1->search("one shoe", {"locations.address.street", "employees.tags"}, "", {}, sort_fields,
{0}, 10, 1, FREQUENCY, {true}).get();
ASSERT_EQ(1, results["hits"].size());
ASSERT_EQ(doc, results["hits"][0]["document"]);
highlight_doc = R"({
"employees":{
"tags":[
"senior plumber",
"electrician"
]
},
"locations":[
{
"address":{
@ -373,13 +355,13 @@ TEST_F(CollectionNestedFieldsTest, SearchOnFieldsOnWildcardSchema) {
},
{
"address":{
"street":"<mark>One</mark> Bowerman Drive"
"street":"175 Commerce Valley"
}
}
]
})"_json;
ASSERT_EQ(highlight_doc.dump(), results["hits"][0]["highlight"].dump());
ASSERT_EQ(highlight_doc.dump(), results["hits"][0]["highlight"]["snippet"].dump());
ASSERT_EQ(0, results["hits"][0]["highlights"].size());
// try to search nested fields that don't exist
@ -451,6 +433,301 @@ TEST_F(CollectionNestedFieldsTest, IncludeExcludeFields) {
ASSERT_EQ(R"({"locations":[{"address":{"products":["shoes","tshirts"]}},{"address":{"products":["sneakers","shoes"]}}]})", doc.dump());
}
TEST_F(CollectionNestedFieldsTest, HighlightNestedFieldFully) {
std::vector<field> fields = {field(".*", field_types::AUTO, false, true)};
auto op = collectionManager.create_collection("coll1", 1, fields, "", 0, field_types::AUTO);
ASSERT_TRUE(op.ok());
Collection* coll1 = op.get();
auto doc = R"({
"company_names": ["Space Corp. LLC", "Drive One Inc."],
"company": {"names": ["Space Corp. LLC", "Drive One Inc."]},
"locations": [
{ "pincode": 100, "country": "USA",
"address": { "street": "One Bowerman Drive", "city": "Beaverton", "products": ["shoes", "tshirts"] }
},
{ "pincode": 200, "country": "Canada",
"address": { "street": "175 Commerce Drive", "city": "Thornhill", "products": ["sneakers", "shoes"] }
}
]
})"_json;
auto add_op = coll1->add(doc.dump(), CREATE);
ASSERT_TRUE(add_op.ok());
// search both simply nested and deeply nested array-of-objects
auto results = coll1->search("One", {"locations.address"}, "", {}, sort_fields, {0}, 10, 1,
token_ordering::FREQUENCY, {true}, 10, spp::sparse_hash_set<std::string>(),
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "locations.address").get();
ASSERT_EQ(1, results["hits"].size());
auto highlight_doc = R"({
"locations":[
{
"address":{
"street":"<mark>One</mark> Bowerman Drive"
}
},
{
"address":{
"street":"175 Commerce Drive"
}
}
]
})"_json;
auto highlight_full_doc = R"({
"locations":[
{
"address":{
"city":"Beaverton",
"products":[
"shoes",
"tshirts"
],
"street":"<mark>One</mark> Bowerman Drive"
}
},
{
"address":{
"city":"Thornhill",
"products":[
"sneakers",
"shoes"
],
"street":"175 Commerce Drive"
}
}
]
})"_json;
ASSERT_EQ(highlight_doc.dump(), results["hits"][0]["highlight"]["snippet"].dump());
ASSERT_EQ(highlight_full_doc.dump(), results["hits"][0]["highlight"]["full"].dump());
ASSERT_EQ(0, results["hits"][0]["highlights"].size());
// repeating token
results = coll1->search("drive", {"locations.address"}, "", {}, sort_fields, {0}, 10, 1,
token_ordering::FREQUENCY, {true}, 10, spp::sparse_hash_set<std::string>(),
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "locations.address").get();
ASSERT_EQ(1, results["hits"].size());
highlight_doc = R"({
"locations":[
{
"address":{
"street":"One Bowerman <mark>Drive</mark>"
}
},
{
"address":{
"street":"175 Commerce <mark>Drive</mark>"
}
}
]
})"_json;
ASSERT_EQ(highlight_doc.dump(), results["hits"][0]["highlight"]["snippet"].dump());
ASSERT_EQ(0, results["hits"][0]["highlights"].size());
// nested array of array, highlighting parent of searched nested field
results = coll1->search("shoes", {"locations.address.products"}, "", {}, sort_fields, {0}, 10, 1,
token_ordering::FREQUENCY, {true}, 10, spp::sparse_hash_set<std::string>(),
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "locations.address",
20, {}, {}, {}, 0, "<mark>", "</mark>", {}, 1000, true, false, true,
"locations.address").get();
ASSERT_EQ(1, results["hits"].size());
highlight_full_doc = R"({
"locations":[
{
"address":{
"city":"Beaverton",
"products":[
"<mark>shoes</mark>",
"tshirts"
],
"street":"One Bowerman Drive"
}
},
{
"address":{
"city":"Thornhill",
"products":[
"sneakers",
"<mark>shoes</mark>"
],
"street":"175 Commerce Drive"
}
}
]
})"_json;
ASSERT_EQ(highlight_full_doc.dump(), results["hits"][0]["highlight"]["full"].dump());
ASSERT_EQ(highlight_full_doc.dump(), results["hits"][0]["highlight"]["snippet"].dump());
// full highlighting only one of the 3 highlight fields
results = coll1->search("drive", {"company.names", "company_names", "locations.address"}, "", {}, sort_fields, {0}, 10, 1,
token_ordering::FREQUENCY, {true}, 10, spp::sparse_hash_set<std::string>(),
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "locations.address",
20, {}, {}, {}, 0, "<mark>", "</mark>", {}, 1000, true, false, true,
"company.names,company_names,locations.address").get();
highlight_full_doc = R"({
"locations":[
{
"address":{
"city":"Beaverton",
"products":[
"shoes",
"tshirts"
],
"street":"One Bowerman <mark>Drive</mark>"
}
},
{
"address":{
"city":"Thornhill",
"products":[
"sneakers",
"shoes"
],
"street":"175 Commerce <mark>Drive</mark>"
}
}
]
})"_json;
highlight_doc = R"({
"company":{
"names": ["Space Corp. LLC", "<mark>Drive</mark> One Inc."]
},
"company_names": ["Space Corp. LLC", "<mark>Drive</mark> One Inc."],
"locations":[
{
"address":{
"city":"Beaverton",
"products":[
"shoes",
"tshirts"
],
"street":"One Bowerman <mark>Drive</mark>"
}
},
{
"address":{
"city":"Thornhill",
"products":[
"sneakers",
"shoes"
],
"street":"175 Commerce <mark>Drive</mark>"
}
}
]
})"_json;
ASSERT_EQ(highlight_full_doc.dump(), results["hits"][0]["highlight"]["full"].dump());
ASSERT_EQ(highlight_doc.dump(), results["hits"][0]["highlight"]["snippet"].dump());
// if highlight fields not provided, only matching sub-fields should appear in highlight
results = coll1->search("space", {"company.names", "company_names", "locations.address"}, "", {}, sort_fields, {0}, 10, 1,
token_ordering::FREQUENCY, {true}, 10, spp::sparse_hash_set<std::string>(),
spp::sparse_hash_set<std::string>(), 10, "", 30, 4).get();
highlight_doc = R"({
"company":{
"names": ["<mark>Space</mark> Corp. LLC", "Drive One Inc."]
},
"company_names": ["<mark>Space</mark> Corp. LLC", "Drive One Inc."]
})"_json;
ASSERT_EQ(highlight_doc.dump(), results["hits"][0]["highlight"]["snippet"].dump());
ASSERT_EQ(0, results["hits"][0]["highlight"]["full"].size());
// only a single highlight full field provided
results = coll1->search("space", {"company.names", "company_names", "locations.address"}, "", {}, sort_fields, {0}, 10, 1,
token_ordering::FREQUENCY, {true}, 10, spp::sparse_hash_set<std::string>(),
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "company.names").get();
highlight_full_doc = R"({
"company":{
"names":[
"<mark>Space</mark> Corp. LLC",
"Drive One Inc."
]
}
})"_json;
highlight_doc = R"({
"company":{
"names":[
"<mark>Space</mark> Corp. LLC",
"Drive One Inc."
]
},
"company_names":[
"<mark>Space</mark> Corp. LLC",
"Drive One Inc."
]
})"_json;
ASSERT_EQ(highlight_doc.dump(), results["hits"][0]["highlight"]["snippet"].dump());
ASSERT_EQ(highlight_full_doc.dump(), results["hits"][0]["highlight"]["full"].dump());
}
TEST_F(CollectionNestedFieldsTest, HighlightShouldHaveMeta) {
std::vector<field> fields = {field(".*", field_types::AUTO, false, true)};
auto op = collectionManager.create_collection("coll1", 1, fields, "", 0, field_types::AUTO);
ASSERT_TRUE(op.ok());
Collection* coll1 = op.get();
auto doc = R"({
"company_names": ["Quick brown fox jumped.", "The red fox was not fast."],
"details": {
"description": "Quick set, go.",
"names": ["Quick brown fox jumped.", "The red fox was not fast."]
},
"locations": [
{
"address": { "street": "Brown Shade Avenue" }
},
{
"address": { "street": "Graywolf Lane" }
}
]
})"_json;
auto add_op = coll1->add(doc.dump(), CREATE);
ASSERT_TRUE(add_op.ok());
// search both simply nested and deeply nested array-of-objects
auto results = coll1->search("brown fox", {"company_names", "details", "locations"},
"", {}, sort_fields, {0}, 10, 1,
token_ordering::FREQUENCY, {true}, 10, spp::sparse_hash_set<std::string>(),
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "locations.address").get();
ASSERT_EQ(3, results["hits"][0]["highlight"]["meta"].size());
ASSERT_EQ(1, results["hits"][0]["highlight"]["meta"]["company_names"].size());
ASSERT_EQ(2, results["hits"][0]["highlight"]["meta"]["company_names"]["matched_tokens"].size());
ASSERT_EQ("brown", results["hits"][0]["highlight"]["meta"]["company_names"]["matched_tokens"][0]);
ASSERT_EQ("fox", results["hits"][0]["highlight"]["meta"]["company_names"]["matched_tokens"][1]);
ASSERT_EQ(2, results["hits"][0]["highlight"]["meta"]["details.names"]["matched_tokens"].size());
ASSERT_EQ("brown", results["hits"][0]["highlight"]["meta"]["details.names"]["matched_tokens"][0]);
ASSERT_EQ("fox", results["hits"][0]["highlight"]["meta"]["details.names"]["matched_tokens"][1]);
ASSERT_EQ(1, results["hits"][0]["highlight"]["meta"]["locations.address.street"]["matched_tokens"].size());
ASSERT_EQ("Brown", results["hits"][0]["highlight"]["meta"]["locations.address.street"]["matched_tokens"][0]);
}
TEST_F(CollectionNestedFieldsTest, GroupByOnNestedFieldsWithWildcardSchema) {
std::vector<field> fields = {field(".*", field_types::AUTO, false, true),
field("education.name", field_types::STRING_ARRAY, true, true),