mirror of
https://github.com/typesense/typesense.git
synced 2025-05-18 12:42:50 +08:00
Split highlight into meta/snippet/full.
This commit is contained in:
parent
0399c1ac72
commit
5a220e7398
@ -124,13 +124,15 @@ private:
|
||||
|
||||
std::string get_seq_id_key(uint32_t seq_id) const;
|
||||
|
||||
void highlight_result(const std::string& raw_query,
|
||||
void highlight_result(const std::string& h_obj,
|
||||
const field &search_field,
|
||||
const size_t search_field_index,
|
||||
const tsl::htrie_map<char, token_leaf>& qtoken_leaves,
|
||||
const std::vector<std::string>& q_tokens,
|
||||
const KV* field_order_kv, const nlohmann::json &document,
|
||||
nlohmann::json& highlight_doc,
|
||||
nlohmann::json& highlight_full_doc,
|
||||
nlohmann::json& highlight_meta,
|
||||
StringUtils & string_utils,
|
||||
const size_t snippet_threshold,
|
||||
const size_t highlight_affix_num_tokens,
|
||||
@ -139,7 +141,9 @@ private:
|
||||
const std::string& highlight_start_tag,
|
||||
const std::string& highlight_end_tag,
|
||||
const uint8_t* index_symbols,
|
||||
highlight_t &highlight) const;
|
||||
highlight_t &highlight,
|
||||
bool& found_highlight,
|
||||
bool& found_full_highlight) const;
|
||||
|
||||
void remove_document(const nlohmann::json & document, const uint32_t seq_id, bool remove_from_store);
|
||||
|
||||
@ -437,8 +441,8 @@ public:
|
||||
void process_highlight_fields(const std::vector<std::string>& search_fields,
|
||||
const tsl::htrie_set<char>& exclude_fields,
|
||||
const tsl::htrie_set<char>& include_fields,
|
||||
const string& highlight_fields,
|
||||
const std::string& highlight_full_fields,
|
||||
const std::vector<std::string>& highlight_field_names,
|
||||
const std::vector<std::string>& highlight_full_field_names,
|
||||
const std::vector<enable_t>& infixes,
|
||||
std::vector<std::string>& q_tokens,
|
||||
const tsl::htrie_map<char, token_leaf>& qtoken_set,
|
||||
@ -454,29 +458,32 @@ public:
|
||||
};
|
||||
|
||||
template<class T>
|
||||
bool highlight_nested_field(const nlohmann::json& doc, nlohmann::json& obj,
|
||||
bool highlight_nested_field(const nlohmann::json& hdoc, nlohmann::json& hobj,
|
||||
const nlohmann::json& fdoc, nlohmann::json& fobj,
|
||||
std::vector<std::string>& path_parts, size_t path_index, T func) {
|
||||
if(path_index == path_parts.size()) {
|
||||
// end of path: guaranteed to be a string
|
||||
if(!obj.is_string()) {
|
||||
if(!hobj.is_string()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
func(obj);
|
||||
func(hobj, fobj);
|
||||
}
|
||||
|
||||
const std::string& fragment = path_parts[path_index];
|
||||
const auto& it = obj.find(fragment);
|
||||
const auto& it = hobj.find(fragment);
|
||||
|
||||
if(it != obj.end()) {
|
||||
if(it != hobj.end()) {
|
||||
if(it.value().is_array()) {
|
||||
bool resolved = false;
|
||||
for(auto& ele: it.value()) {
|
||||
resolved |= highlight_nested_field(doc, ele, path_parts, path_index + 1, func);
|
||||
for(size_t i = 0; i < it.value().size(); i++) {
|
||||
auto& h_ele = it.value().at(i);
|
||||
auto& f_ele = fobj[fragment][i];
|
||||
resolved |= highlight_nested_field(hdoc, h_ele, fdoc, f_ele, path_parts, path_index + 1, func);
|
||||
}
|
||||
return resolved;
|
||||
} else {
|
||||
return highlight_nested_field(doc, it.value(), path_parts, path_index + 1, func);
|
||||
return highlight_nested_field(hdoc, it.value(), fdoc, fobj[fragment], path_parts, path_index + 1, func);
|
||||
}
|
||||
} {
|
||||
return false;
|
||||
|
@ -1269,15 +1269,23 @@ Option<nlohmann::json> Collection::search(const std::string & raw_query,
|
||||
// handle which fields have to be highlighted
|
||||
|
||||
std::vector<highlight_field_t> highlight_items;
|
||||
tsl::htrie_set<char> hfield_names;
|
||||
bool has_atleast_one_fully_highlighted_field = false;
|
||||
|
||||
std::vector<std::string> highlight_field_names;
|
||||
StringUtils::split(highlight_fields, highlight_field_names, ",");
|
||||
|
||||
std::vector<std::string> highlight_full_field_names;
|
||||
StringUtils::split(highlight_full_fields, highlight_full_field_names, ",");
|
||||
|
||||
if(query != "*") {
|
||||
process_highlight_fields(search_fields, include_fields_full, exclude_fields_full, highlight_fields,
|
||||
highlight_full_fields, infixes, q_tokens, search_params->qtoken_set,
|
||||
highlight_items);
|
||||
process_highlight_fields(search_fields, include_fields_full, exclude_fields_full,
|
||||
highlight_field_names, highlight_full_field_names, infixes, q_tokens,
|
||||
search_params->qtoken_set, highlight_items);
|
||||
|
||||
for(auto& highlight_item: highlight_items) {
|
||||
hfield_names.insert(highlight_item.name);
|
||||
if(highlight_item.fully_highlighted) {
|
||||
has_atleast_one_fully_highlighted_field = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -1320,12 +1328,22 @@ Option<nlohmann::json> Collection::search(const std::string & raw_query,
|
||||
continue;
|
||||
}
|
||||
|
||||
nlohmann::json highlight_doc;
|
||||
nlohmann::json highlight_res;
|
||||
|
||||
if(!highlight_items.empty()) {
|
||||
highlight_doc = document;
|
||||
remove_flat_fields(highlight_doc);
|
||||
highlight_doc.erase("id");
|
||||
highlight_res["meta"] = nlohmann::json::object();
|
||||
|
||||
highlight_res["snippet"] = document;
|
||||
remove_flat_fields(highlight_res["snippet"]);
|
||||
highlight_res["snippet"].erase("id");
|
||||
|
||||
if(has_atleast_one_fully_highlighted_field) {
|
||||
highlight_res["full"] = document;
|
||||
remove_flat_fields(highlight_res["full"]);
|
||||
highlight_res["full"].erase("id");
|
||||
} else {
|
||||
highlight_res["full"] = nlohmann::json::object();
|
||||
}
|
||||
}
|
||||
|
||||
nlohmann::json wrapper_doc;
|
||||
@ -1333,6 +1351,9 @@ Option<nlohmann::json> Collection::search(const std::string & raw_query,
|
||||
std::vector<highlight_t> highlights;
|
||||
StringUtils string_utils;
|
||||
|
||||
tsl::htrie_set<char> hfield_names;
|
||||
tsl::htrie_set<char> h_full_field_names;
|
||||
|
||||
for(size_t i = 0; i < highlight_items.size(); i++) {
|
||||
auto& highlight_item = highlight_items[i];
|
||||
const std::string& field_name = highlight_item.name;
|
||||
@ -1347,18 +1368,48 @@ Option<nlohmann::json> Collection::search(const std::string & raw_query,
|
||||
|
||||
highlight_t highlight;
|
||||
highlight.field = search_field.name;
|
||||
|
||||
bool found_highlight = false;
|
||||
bool found_full_highlight = false;
|
||||
|
||||
highlight_result(raw_query, search_field, i, highlight_item.qtoken_leaves, q_tokens, field_order_kv,
|
||||
document, highlight_doc, string_utils, snippet_threshold,
|
||||
document, highlight_res["snippet"], highlight_res["full"], highlight_res["meta"],
|
||||
string_utils, snippet_threshold,
|
||||
highlight_affix_num_tokens, highlight_item.fully_highlighted, highlight_item.infix,
|
||||
highlight_start_tag, highlight_end_tag, index_symbols, highlight);
|
||||
highlight_start_tag, highlight_end_tag, index_symbols, highlight,
|
||||
found_highlight, found_full_highlight);
|
||||
if(!highlight.snippets.empty()) {
|
||||
highlights.push_back(highlight);
|
||||
}
|
||||
|
||||
if(found_highlight) {
|
||||
hfield_names.insert(search_field.name);
|
||||
if(found_full_highlight) {
|
||||
h_full_field_names.insert(search_field.name);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// explicit highlight fields could be parent of searched fields, so we will take a pass at that
|
||||
for(auto& hfield_name: highlight_full_field_names) {
|
||||
auto it = h_full_field_names.equal_prefix_range(hfield_name);
|
||||
if(it.first != it.second) {
|
||||
h_full_field_names.insert(hfield_name);
|
||||
}
|
||||
}
|
||||
|
||||
for(auto& hfield_name: highlight_field_names) {
|
||||
auto it = hfield_names.equal_prefix_range(hfield_name);
|
||||
if(it.first != it.second) {
|
||||
hfield_names.insert(hfield_name);
|
||||
}
|
||||
}
|
||||
|
||||
// remove fields from highlight doc that were not highlighted
|
||||
prune_doc(highlight_doc, hfield_names, tsl::htrie_set<char>(), "");
|
||||
prune_doc(highlight_res["snippet"], hfield_names, tsl::htrie_set<char>(), "");
|
||||
prune_doc(highlight_res["full"], h_full_field_names, tsl::htrie_set<char>(), "");
|
||||
|
||||
std::sort(highlights.begin(), highlights.end());
|
||||
|
||||
for(const auto & highlight: highlights) {
|
||||
@ -1403,7 +1454,7 @@ Option<nlohmann::json> Collection::search(const std::string & raw_query,
|
||||
remove_flat_fields(document);
|
||||
|
||||
wrapper_doc["document"] = document;
|
||||
wrapper_doc["highlight"] = highlight_doc;
|
||||
wrapper_doc["highlight"] = highlight_res;
|
||||
|
||||
if(field_order_kv->match_score_index == CURATED_RECORD_IDENTIFIER) {
|
||||
wrapper_doc["curated"] = true;
|
||||
@ -1698,8 +1749,8 @@ void Collection::populate_text_match_info(nlohmann::json& info, uint64_t match_s
|
||||
void Collection::process_highlight_fields(const std::vector<std::string>& search_fields,
|
||||
const tsl::htrie_set<char>& include_fields,
|
||||
const tsl::htrie_set<char>& exclude_fields,
|
||||
const string& highlight_fields,
|
||||
const std::string& highlight_full_fields,
|
||||
const std::vector<std::string>& highlight_field_names,
|
||||
const std::vector<std::string>& highlight_full_field_names,
|
||||
const std::vector<enable_t>& infixes,
|
||||
std::vector<std::string>& q_tokens,
|
||||
const tsl::htrie_map<char, token_leaf>& qtoken_set,
|
||||
@ -1707,10 +1758,8 @@ void Collection::process_highlight_fields(const std::vector<std::string>& search
|
||||
|
||||
// identify full highlight fields
|
||||
spp::sparse_hash_set<std::string> fields_highlighted_fully_set;
|
||||
std::vector<std::string> fields_highlighted_fully_vec;
|
||||
StringUtils::split(highlight_full_fields, fields_highlighted_fully_vec, ",");
|
||||
std::vector<std::string> fields_highlighted_fully_expanded;
|
||||
for(std::string & highlight_full_field: fields_highlighted_fully_vec) {
|
||||
for(const std::string& highlight_full_field: highlight_full_field_names) {
|
||||
extract_field_name(highlight_full_field, search_schema, fields_highlighted_fully_expanded);
|
||||
}
|
||||
|
||||
@ -1730,7 +1779,7 @@ void Collection::process_highlight_fields(const std::vector<std::string>& search
|
||||
}
|
||||
}
|
||||
|
||||
if(highlight_fields.empty()) {
|
||||
if(highlight_field_names.empty()) {
|
||||
for(size_t i = 0; i < search_fields.size(); i++) {
|
||||
const auto& field_name = search_fields[i];
|
||||
if(exclude_fields.count(field_name) != 0) {
|
||||
@ -1748,17 +1797,20 @@ void Collection::process_highlight_fields(const std::vector<std::string>& search
|
||||
highlight_items.emplace_back(field_name, fully_highlighted, infixed);
|
||||
}
|
||||
} else {
|
||||
std::vector<std::string> highlight_field_names;
|
||||
StringUtils::split(highlight_fields, highlight_field_names, ",");
|
||||
|
||||
std::vector<std::string> highlight_field_names_expanded;
|
||||
for(size_t i = 0; i < highlight_field_names.size(); i++) {
|
||||
if(search_schema.count(highlight_field_names[i]) == 0) {
|
||||
extract_field_name(highlight_field_names[i], search_schema, highlight_field_names_expanded);
|
||||
}
|
||||
|
||||
for(size_t i = 0; i < highlight_field_names_expanded.size(); i++) {
|
||||
const auto& highlight_field_name = highlight_field_names_expanded[i];
|
||||
if(search_schema.count(highlight_field_name) == 0) {
|
||||
// ignore fields not part of schema
|
||||
continue;
|
||||
}
|
||||
bool fully_highlighted = (fields_highlighted_fully_set.count(highlight_field_names[i]) != 0);
|
||||
bool infixed = (fields_infixed_set.count(highlight_field_names[i]) != 0);
|
||||
highlight_items.emplace_back(highlight_field_names[i], fully_highlighted, infixed);
|
||||
bool fully_highlighted = (fields_highlighted_fully_set.count(highlight_field_name) != 0);
|
||||
bool infixed = (fields_infixed_set.count(highlight_field_name) != 0);
|
||||
highlight_items.emplace_back(highlight_field_name, fully_highlighted, infixed);
|
||||
}
|
||||
}
|
||||
|
||||
@ -2047,6 +2099,8 @@ void Collection::highlight_result(const std::string& raw_query, const field &sea
|
||||
const std::vector<std::string>& q_tokens,
|
||||
const KV* field_order_kv, const nlohmann::json & document,
|
||||
nlohmann::json& highlight_doc,
|
||||
nlohmann::json& highlight_full_doc,
|
||||
nlohmann::json& highlight_meta,
|
||||
StringUtils & string_utils,
|
||||
const size_t snippet_threshold,
|
||||
const size_t highlight_affix_num_tokens,
|
||||
@ -2055,12 +2109,16 @@ void Collection::highlight_result(const std::string& raw_query, const field &sea
|
||||
const std::string& highlight_start_tag,
|
||||
const std::string& highlight_end_tag,
|
||||
const uint8_t* index_symbols,
|
||||
highlight_t& highlight) const {
|
||||
highlight_t& highlight,
|
||||
bool& found_highlight,
|
||||
bool& found_full_highlight) const {
|
||||
|
||||
if(q_tokens.size() == 1 && q_tokens[0] == "*") {
|
||||
return;
|
||||
}
|
||||
|
||||
tsl::htrie_set<char> matched_tokens;
|
||||
|
||||
bool is_cyrillic = Tokenizer::is_cyrillic(search_field.locale);
|
||||
bool normalise = is_cyrillic ? false : true;
|
||||
|
||||
@ -2123,15 +2181,18 @@ void Collection::highlight_result(const std::string& raw_query, const field &sea
|
||||
std::vector<std::string> path_parts;
|
||||
StringUtils::split(search_field.name, path_parts, ".");
|
||||
|
||||
highlight_nested_field(highlight_doc, highlight_doc, path_parts, 0, [&](nlohmann::json& str_obj) {
|
||||
highlight_nested_field(highlight_doc, highlight_doc, highlight_full_doc, highlight_full_doc,
|
||||
path_parts, 0, [&](nlohmann::json& h_obj, nlohmann::json& f_obj) {
|
||||
Match match;
|
||||
match_index_t match_index(match, 0, 0);
|
||||
int last_valid_offset_index = -1;
|
||||
size_t last_valid_offset = 0;
|
||||
|
||||
std::string text = str_obj.get<std::string>();
|
||||
bool found_higlight = handle_highlight_text(text, normalise, search_field, symbols_to_index,
|
||||
token_separators, highlight, string_utils, is_cyrillic,
|
||||
highlight_t array_highlight = highlight;
|
||||
|
||||
std::string text = h_obj.get<std::string>();
|
||||
handle_highlight_text(text, normalise, search_field, symbols_to_index,
|
||||
token_separators, array_highlight, string_utils, is_cyrillic,
|
||||
highlight_affix_num_tokens,
|
||||
qtoken_leaves, last_valid_offset_index, match,
|
||||
last_raw_q_token,
|
||||
@ -2139,11 +2200,30 @@ void Collection::highlight_result(const std::string& raw_query, const field &sea
|
||||
raw_query_tokens,
|
||||
last_valid_offset, highlight_start_tag, highlight_end_tag,
|
||||
index_symbols, match_index);
|
||||
if(!highlight.snippets.empty()) {
|
||||
str_obj = highlight.snippets[0];
|
||||
|
||||
if(!array_highlight.snippets.empty()) {
|
||||
h_obj = array_highlight.snippets[0];
|
||||
found_highlight = found_highlight || true;
|
||||
for(auto& token_vec: array_highlight.matched_tokens) {
|
||||
for(auto& token: token_vec) {
|
||||
matched_tokens.insert(token);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(highlight_fully && !array_highlight.values.empty()) {
|
||||
f_obj = array_highlight.values[0];;
|
||||
found_full_highlight = found_full_highlight || true;
|
||||
}
|
||||
});
|
||||
|
||||
if(found_highlight) {
|
||||
highlight_meta[search_field.name] = nlohmann::json::object();
|
||||
for(auto it = matched_tokens.begin(); it != matched_tokens.end(); ++it) {
|
||||
highlight_meta[search_field.name]["matched_tokens"].push_back(it.key());
|
||||
}
|
||||
}
|
||||
|
||||
return ;
|
||||
}
|
||||
|
||||
@ -2228,6 +2308,18 @@ void Collection::highlight_result(const std::string& raw_query, const field &sea
|
||||
last_valid_offset, highlight_start_tag, highlight_end_tag,
|
||||
index_symbols, match_index);
|
||||
|
||||
if(!highlight.snippets.empty()) {
|
||||
found_highlight = found_highlight || true;
|
||||
for(auto& token_vec: highlight.matched_tokens) {
|
||||
for(auto& token: token_vec) {
|
||||
matched_tokens.insert(token);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(!highlight.values.empty()) {
|
||||
found_full_highlight = found_full_highlight || true;
|
||||
}
|
||||
}
|
||||
|
||||
highlight.field = search_field.name;
|
||||
@ -2237,27 +2329,55 @@ void Collection::highlight_result(const std::string& raw_query, const field &sea
|
||||
highlight.match_score = match_indices[0].match_score;
|
||||
}
|
||||
|
||||
if(search_field.nested) {
|
||||
std::vector<std::string> parts;
|
||||
StringUtils::split(search_field.name, parts, ".");
|
||||
nlohmann::json* val = highlight_doc.contains(parts[0]) ? &highlight_doc[parts[0]] : nullptr;
|
||||
// in-place highlighting under the new highlight structure
|
||||
std::vector<std::string> parts;
|
||||
StringUtils::split(search_field.name, parts, ".");
|
||||
nlohmann::json* hval = highlight_doc.contains(parts[0]) ? &highlight_doc[parts[0]] : nullptr;
|
||||
nlohmann::json* fval = highlight_full_doc.contains(parts[0]) ? &highlight_full_doc[parts[0]] : nullptr;
|
||||
|
||||
for(size_t i = 1; val != nullptr && i < parts.size(); i++) {
|
||||
const auto& part = parts[i];
|
||||
if(val->contains(part)) {
|
||||
val = &val->at(part);
|
||||
} else {
|
||||
val = nullptr;
|
||||
for(size_t i = 1; hval != nullptr && i < parts.size(); i++) {
|
||||
const auto& part = parts[i];
|
||||
if(hval->contains(part)) {
|
||||
hval = &hval->at(part);
|
||||
} else {
|
||||
hval = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
for(size_t i = 1; fval != nullptr && i < parts.size(); i++) {
|
||||
const auto& part = parts[i];
|
||||
if(fval->contains(part)) {
|
||||
fval = &fval->at(part);
|
||||
} else {
|
||||
fval = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
if(hval) {
|
||||
if(highlight.indices.empty()) {
|
||||
*hval = highlight.snippets[0];
|
||||
} else {
|
||||
if(hval->is_array()) {
|
||||
for(size_t hi = 0; hi < highlight.indices.size(); hi++) {
|
||||
hval->at(highlight.indices[hi]) = highlight.snippets[hi];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(val) {
|
||||
highlight_meta[search_field.name] = nlohmann::json::object();
|
||||
for(auto it = matched_tokens.begin(); it != matched_tokens.end(); ++it) {
|
||||
highlight_meta[search_field.name]["matched_tokens"].push_back(it.key());
|
||||
}
|
||||
}
|
||||
|
||||
if(fval) {
|
||||
if(!highlight.values.empty()) {
|
||||
if(highlight.indices.empty()) {
|
||||
*val = highlight.snippets[0];
|
||||
*fval = highlight.values[0];
|
||||
} else {
|
||||
if(val->is_array()) {
|
||||
for(size_t hi = 0; hi < highlight.indices.size(); hi++) {
|
||||
val->at(highlight.indices[hi]) = highlight.snippets[hi];
|
||||
if(fval->is_array()) {
|
||||
for(size_t hi = 0; hi < highlight.values.size(); hi++) {
|
||||
fval->at(highlight.indices[hi]) = highlight.values[hi];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -326,45 +326,27 @@ TEST_F(CollectionNestedFieldsTest, SearchOnFieldsOnWildcardSchema) {
|
||||
"locations":[
|
||||
{
|
||||
"address":{
|
||||
"city":"Beaverton",
|
||||
"products":[
|
||||
"shoes",
|
||||
"tshirts"
|
||||
],
|
||||
"street":"One Bowerman Drive"
|
||||
},
|
||||
"country":"USA"
|
||||
}
|
||||
},
|
||||
{
|
||||
"address":{
|
||||
"city":"Thornhill",
|
||||
"products":[
|
||||
"sneakers",
|
||||
"shoes"
|
||||
],
|
||||
"street":"175 <mark>Commerce</mark> Valley"
|
||||
},
|
||||
"country":"Canada"
|
||||
}
|
||||
}
|
||||
]
|
||||
})"_json;
|
||||
|
||||
ASSERT_EQ(highlight_doc.dump(), results["hits"][0]["highlight"].dump());
|
||||
ASSERT_EQ(highlight_doc.dump(), results["hits"][0]["highlight"]["snippet"].dump());
|
||||
ASSERT_EQ(0, results["hits"][0]["highlights"].size());
|
||||
|
||||
// search specific nested fields
|
||||
// search specific nested fields, only matching field is highlighted by default
|
||||
results = coll1->search("one shoe", {"locations.address.street", "employees.tags"}, "", {}, sort_fields,
|
||||
{0}, 10, 1, FREQUENCY, {true}).get();
|
||||
ASSERT_EQ(1, results["hits"].size());
|
||||
ASSERT_EQ(doc, results["hits"][0]["document"]);
|
||||
|
||||
highlight_doc = R"({
|
||||
"employees":{
|
||||
"tags":[
|
||||
"senior plumber",
|
||||
"electrician"
|
||||
]
|
||||
},
|
||||
"locations":[
|
||||
{
|
||||
"address":{
|
||||
@ -373,13 +355,13 @@ TEST_F(CollectionNestedFieldsTest, SearchOnFieldsOnWildcardSchema) {
|
||||
},
|
||||
{
|
||||
"address":{
|
||||
"street":"<mark>One</mark> Bowerman Drive"
|
||||
"street":"175 Commerce Valley"
|
||||
}
|
||||
}
|
||||
]
|
||||
})"_json;
|
||||
|
||||
ASSERT_EQ(highlight_doc.dump(), results["hits"][0]["highlight"].dump());
|
||||
ASSERT_EQ(highlight_doc.dump(), results["hits"][0]["highlight"]["snippet"].dump());
|
||||
ASSERT_EQ(0, results["hits"][0]["highlights"].size());
|
||||
|
||||
// try to search nested fields that don't exist
|
||||
@ -451,6 +433,301 @@ TEST_F(CollectionNestedFieldsTest, IncludeExcludeFields) {
|
||||
ASSERT_EQ(R"({"locations":[{"address":{"products":["shoes","tshirts"]}},{"address":{"products":["sneakers","shoes"]}}]})", doc.dump());
|
||||
}
|
||||
|
||||
TEST_F(CollectionNestedFieldsTest, HighlightNestedFieldFully) {
|
||||
std::vector<field> fields = {field(".*", field_types::AUTO, false, true)};
|
||||
|
||||
auto op = collectionManager.create_collection("coll1", 1, fields, "", 0, field_types::AUTO);
|
||||
ASSERT_TRUE(op.ok());
|
||||
Collection* coll1 = op.get();
|
||||
|
||||
auto doc = R"({
|
||||
"company_names": ["Space Corp. LLC", "Drive One Inc."],
|
||||
"company": {"names": ["Space Corp. LLC", "Drive One Inc."]},
|
||||
"locations": [
|
||||
{ "pincode": 100, "country": "USA",
|
||||
"address": { "street": "One Bowerman Drive", "city": "Beaverton", "products": ["shoes", "tshirts"] }
|
||||
},
|
||||
{ "pincode": 200, "country": "Canada",
|
||||
"address": { "street": "175 Commerce Drive", "city": "Thornhill", "products": ["sneakers", "shoes"] }
|
||||
}
|
||||
]
|
||||
})"_json;
|
||||
|
||||
auto add_op = coll1->add(doc.dump(), CREATE);
|
||||
ASSERT_TRUE(add_op.ok());
|
||||
|
||||
// search both simply nested and deeply nested array-of-objects
|
||||
auto results = coll1->search("One", {"locations.address"}, "", {}, sort_fields, {0}, 10, 1,
|
||||
token_ordering::FREQUENCY, {true}, 10, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "locations.address").get();
|
||||
|
||||
ASSERT_EQ(1, results["hits"].size());
|
||||
|
||||
auto highlight_doc = R"({
|
||||
"locations":[
|
||||
{
|
||||
"address":{
|
||||
"street":"<mark>One</mark> Bowerman Drive"
|
||||
}
|
||||
},
|
||||
{
|
||||
"address":{
|
||||
"street":"175 Commerce Drive"
|
||||
}
|
||||
}
|
||||
]
|
||||
})"_json;
|
||||
|
||||
auto highlight_full_doc = R"({
|
||||
"locations":[
|
||||
{
|
||||
"address":{
|
||||
"city":"Beaverton",
|
||||
"products":[
|
||||
"shoes",
|
||||
"tshirts"
|
||||
],
|
||||
"street":"<mark>One</mark> Bowerman Drive"
|
||||
}
|
||||
},
|
||||
{
|
||||
"address":{
|
||||
"city":"Thornhill",
|
||||
"products":[
|
||||
"sneakers",
|
||||
"shoes"
|
||||
],
|
||||
"street":"175 Commerce Drive"
|
||||
}
|
||||
}
|
||||
]
|
||||
})"_json;
|
||||
|
||||
ASSERT_EQ(highlight_doc.dump(), results["hits"][0]["highlight"]["snippet"].dump());
|
||||
ASSERT_EQ(highlight_full_doc.dump(), results["hits"][0]["highlight"]["full"].dump());
|
||||
ASSERT_EQ(0, results["hits"][0]["highlights"].size());
|
||||
|
||||
// repeating token
|
||||
|
||||
results = coll1->search("drive", {"locations.address"}, "", {}, sort_fields, {0}, 10, 1,
|
||||
token_ordering::FREQUENCY, {true}, 10, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "locations.address").get();
|
||||
|
||||
ASSERT_EQ(1, results["hits"].size());
|
||||
|
||||
highlight_doc = R"({
|
||||
"locations":[
|
||||
{
|
||||
"address":{
|
||||
"street":"One Bowerman <mark>Drive</mark>"
|
||||
}
|
||||
},
|
||||
{
|
||||
"address":{
|
||||
"street":"175 Commerce <mark>Drive</mark>"
|
||||
}
|
||||
}
|
||||
]
|
||||
})"_json;
|
||||
|
||||
ASSERT_EQ(highlight_doc.dump(), results["hits"][0]["highlight"]["snippet"].dump());
|
||||
ASSERT_EQ(0, results["hits"][0]["highlights"].size());
|
||||
|
||||
// nested array of array, highlighting parent of searched nested field
|
||||
results = coll1->search("shoes", {"locations.address.products"}, "", {}, sort_fields, {0}, 10, 1,
|
||||
token_ordering::FREQUENCY, {true}, 10, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "locations.address",
|
||||
20, {}, {}, {}, 0, "<mark>", "</mark>", {}, 1000, true, false, true,
|
||||
"locations.address").get();
|
||||
|
||||
ASSERT_EQ(1, results["hits"].size());
|
||||
highlight_full_doc = R"({
|
||||
"locations":[
|
||||
{
|
||||
"address":{
|
||||
"city":"Beaverton",
|
||||
"products":[
|
||||
"<mark>shoes</mark>",
|
||||
"tshirts"
|
||||
],
|
||||
"street":"One Bowerman Drive"
|
||||
}
|
||||
},
|
||||
{
|
||||
"address":{
|
||||
"city":"Thornhill",
|
||||
"products":[
|
||||
"sneakers",
|
||||
"<mark>shoes</mark>"
|
||||
],
|
||||
"street":"175 Commerce Drive"
|
||||
}
|
||||
}
|
||||
]
|
||||
})"_json;
|
||||
|
||||
ASSERT_EQ(highlight_full_doc.dump(), results["hits"][0]["highlight"]["full"].dump());
|
||||
ASSERT_EQ(highlight_full_doc.dump(), results["hits"][0]["highlight"]["snippet"].dump());
|
||||
|
||||
// full highlighting only one of the 3 highlight fields
|
||||
results = coll1->search("drive", {"company.names", "company_names", "locations.address"}, "", {}, sort_fields, {0}, 10, 1,
|
||||
token_ordering::FREQUENCY, {true}, 10, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "locations.address",
|
||||
20, {}, {}, {}, 0, "<mark>", "</mark>", {}, 1000, true, false, true,
|
||||
"company.names,company_names,locations.address").get();
|
||||
|
||||
highlight_full_doc = R"({
|
||||
"locations":[
|
||||
{
|
||||
"address":{
|
||||
"city":"Beaverton",
|
||||
"products":[
|
||||
"shoes",
|
||||
"tshirts"
|
||||
],
|
||||
"street":"One Bowerman <mark>Drive</mark>"
|
||||
}
|
||||
},
|
||||
{
|
||||
"address":{
|
||||
"city":"Thornhill",
|
||||
"products":[
|
||||
"sneakers",
|
||||
"shoes"
|
||||
],
|
||||
"street":"175 Commerce <mark>Drive</mark>"
|
||||
}
|
||||
}
|
||||
]
|
||||
})"_json;
|
||||
|
||||
highlight_doc = R"({
|
||||
"company":{
|
||||
"names": ["Space Corp. LLC", "<mark>Drive</mark> One Inc."]
|
||||
},
|
||||
"company_names": ["Space Corp. LLC", "<mark>Drive</mark> One Inc."],
|
||||
"locations":[
|
||||
{
|
||||
"address":{
|
||||
"city":"Beaverton",
|
||||
"products":[
|
||||
"shoes",
|
||||
"tshirts"
|
||||
],
|
||||
"street":"One Bowerman <mark>Drive</mark>"
|
||||
}
|
||||
},
|
||||
{
|
||||
"address":{
|
||||
"city":"Thornhill",
|
||||
"products":[
|
||||
"sneakers",
|
||||
"shoes"
|
||||
],
|
||||
"street":"175 Commerce <mark>Drive</mark>"
|
||||
}
|
||||
}
|
||||
]
|
||||
})"_json;
|
||||
|
||||
ASSERT_EQ(highlight_full_doc.dump(), results["hits"][0]["highlight"]["full"].dump());
|
||||
ASSERT_EQ(highlight_doc.dump(), results["hits"][0]["highlight"]["snippet"].dump());
|
||||
|
||||
// if highlight fields not provided, only matching sub-fields should appear in highlight
|
||||
|
||||
results = coll1->search("space", {"company.names", "company_names", "locations.address"}, "", {}, sort_fields, {0}, 10, 1,
|
||||
token_ordering::FREQUENCY, {true}, 10, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 4).get();
|
||||
|
||||
highlight_doc = R"({
|
||||
"company":{
|
||||
"names": ["<mark>Space</mark> Corp. LLC", "Drive One Inc."]
|
||||
},
|
||||
"company_names": ["<mark>Space</mark> Corp. LLC", "Drive One Inc."]
|
||||
})"_json;
|
||||
|
||||
ASSERT_EQ(highlight_doc.dump(), results["hits"][0]["highlight"]["snippet"].dump());
|
||||
ASSERT_EQ(0, results["hits"][0]["highlight"]["full"].size());
|
||||
|
||||
// only a single highlight full field provided
|
||||
|
||||
results = coll1->search("space", {"company.names", "company_names", "locations.address"}, "", {}, sort_fields, {0}, 10, 1,
|
||||
token_ordering::FREQUENCY, {true}, 10, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "company.names").get();
|
||||
|
||||
highlight_full_doc = R"({
|
||||
"company":{
|
||||
"names":[
|
||||
"<mark>Space</mark> Corp. LLC",
|
||||
"Drive One Inc."
|
||||
]
|
||||
}
|
||||
})"_json;
|
||||
|
||||
highlight_doc = R"({
|
||||
"company":{
|
||||
"names":[
|
||||
"<mark>Space</mark> Corp. LLC",
|
||||
"Drive One Inc."
|
||||
]
|
||||
},
|
||||
"company_names":[
|
||||
"<mark>Space</mark> Corp. LLC",
|
||||
"Drive One Inc."
|
||||
]
|
||||
})"_json;
|
||||
|
||||
ASSERT_EQ(highlight_doc.dump(), results["hits"][0]["highlight"]["snippet"].dump());
|
||||
ASSERT_EQ(highlight_full_doc.dump(), results["hits"][0]["highlight"]["full"].dump());
|
||||
}
|
||||
|
||||
TEST_F(CollectionNestedFieldsTest, HighlightShouldHaveMeta) {
|
||||
std::vector<field> fields = {field(".*", field_types::AUTO, false, true)};
|
||||
|
||||
auto op = collectionManager.create_collection("coll1", 1, fields, "", 0, field_types::AUTO);
|
||||
ASSERT_TRUE(op.ok());
|
||||
Collection* coll1 = op.get();
|
||||
|
||||
auto doc = R"({
|
||||
"company_names": ["Quick brown fox jumped.", "The red fox was not fast."],
|
||||
"details": {
|
||||
"description": "Quick set, go.",
|
||||
"names": ["Quick brown fox jumped.", "The red fox was not fast."]
|
||||
},
|
||||
"locations": [
|
||||
{
|
||||
"address": { "street": "Brown Shade Avenue" }
|
||||
},
|
||||
{
|
||||
"address": { "street": "Graywolf Lane" }
|
||||
}
|
||||
]
|
||||
})"_json;
|
||||
|
||||
auto add_op = coll1->add(doc.dump(), CREATE);
|
||||
ASSERT_TRUE(add_op.ok());
|
||||
|
||||
// search both simply nested and deeply nested array-of-objects
|
||||
auto results = coll1->search("brown fox", {"company_names", "details", "locations"},
|
||||
"", {}, sort_fields, {0}, 10, 1,
|
||||
token_ordering::FREQUENCY, {true}, 10, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "locations.address").get();
|
||||
|
||||
ASSERT_EQ(3, results["hits"][0]["highlight"]["meta"].size());
|
||||
ASSERT_EQ(1, results["hits"][0]["highlight"]["meta"]["company_names"].size());
|
||||
|
||||
ASSERT_EQ(2, results["hits"][0]["highlight"]["meta"]["company_names"]["matched_tokens"].size());
|
||||
ASSERT_EQ("brown", results["hits"][0]["highlight"]["meta"]["company_names"]["matched_tokens"][0]);
|
||||
ASSERT_EQ("fox", results["hits"][0]["highlight"]["meta"]["company_names"]["matched_tokens"][1]);
|
||||
|
||||
ASSERT_EQ(2, results["hits"][0]["highlight"]["meta"]["details.names"]["matched_tokens"].size());
|
||||
ASSERT_EQ("brown", results["hits"][0]["highlight"]["meta"]["details.names"]["matched_tokens"][0]);
|
||||
ASSERT_EQ("fox", results["hits"][0]["highlight"]["meta"]["details.names"]["matched_tokens"][1]);
|
||||
|
||||
ASSERT_EQ(1, results["hits"][0]["highlight"]["meta"]["locations.address.street"]["matched_tokens"].size());
|
||||
ASSERT_EQ("Brown", results["hits"][0]["highlight"]["meta"]["locations.address.street"]["matched_tokens"][0]);
|
||||
}
|
||||
|
||||
TEST_F(CollectionNestedFieldsTest, GroupByOnNestedFieldsWithWildcardSchema) {
|
||||
std::vector<field> fields = {field(".*", field_types::AUTO, false, true),
|
||||
field("education.name", field_types::STRING_ARRAY, true, true),
|
||||
|
Loading…
x
Reference in New Issue
Block a user