Split highlight into meta/snippet/full.

2025-05-18 12:42:50 +08:00 · 2022-07-27 17:47:21 +05:30 · 2022-07-27 17:47:21 +05:30 · 5a220e7398
commit 5a220e7398
parent 0399c1ac72
3 changed files with 488 additions and 84 deletions
--- a/include/collection.h
+++ b/include/collection.h
@ -124,13 +124,15 @@ private:

    std::string get_seq_id_key(uint32_t seq_id) const;

-    void highlight_result(const std::string& raw_query,
+    void highlight_result(const std::string& h_obj,
                          const field &search_field,
                          const size_t search_field_index,
                          const tsl::htrie_map<char, token_leaf>& qtoken_leaves,
                          const std::vector<std::string>& q_tokens,
                          const KV* field_order_kv, const nlohmann::json &document,
                          nlohmann::json& highlight_doc,
+                          nlohmann::json& highlight_full_doc,
+                          nlohmann::json& highlight_meta,
                          StringUtils & string_utils,
                          const size_t snippet_threshold,
                          const size_t highlight_affix_num_tokens,
@ -139,7 +141,9 @@ private:
                          const std::string& highlight_start_tag,
                          const std::string& highlight_end_tag,
                          const uint8_t* index_symbols,
-                          highlight_t &highlight) const;
+                          highlight_t &highlight,
+                          bool& found_highlight,
+                          bool& found_full_highlight) const;

    void remove_document(const nlohmann::json & document, const uint32_t seq_id, bool remove_from_store);

@ -437,8 +441,8 @@ public:
    void process_highlight_fields(const std::vector<std::string>& search_fields,
                                  const tsl::htrie_set<char>& exclude_fields,
                                  const tsl::htrie_set<char>& include_fields,
-                                  const string& highlight_fields,
-                                  const std::string& highlight_full_fields,
+                                  const std::vector<std::string>& highlight_field_names,
+                                  const std::vector<std::string>& highlight_full_field_names,
                                  const std::vector<enable_t>& infixes,
                                  std::vector<std::string>& q_tokens,
                                  const tsl::htrie_map<char, token_leaf>& qtoken_set,
@ -454,29 +458,32 @@ public:
 };

 template<class T>
-bool highlight_nested_field(const nlohmann::json& doc, nlohmann::json& obj,
+bool highlight_nested_field(const nlohmann::json& hdoc, nlohmann::json& hobj,
+                            const nlohmann::json& fdoc, nlohmann::json& fobj,
                            std::vector<std::string>& path_parts, size_t path_index, T func) {
    if(path_index == path_parts.size()) {
        // end of path: guaranteed to be a string
-        if(!obj.is_string()) {
+        if(!hobj.is_string()) {
            return false;
        }

-        func(obj);
+        func(hobj, fobj);
    }

    const std::string& fragment = path_parts[path_index];
-    const auto& it = obj.find(fragment);
+    const auto& it = hobj.find(fragment);

-    if(it != obj.end()) {
+    if(it != hobj.end()) {
        if(it.value().is_array()) {
            bool resolved = false;
-            for(auto& ele: it.value()) {
-                resolved |= highlight_nested_field(doc, ele, path_parts, path_index + 1, func);
+            for(size_t i = 0; i < it.value().size(); i++) {
+                auto& h_ele = it.value().at(i);
+                auto& f_ele = fobj[fragment][i];
+                resolved |= highlight_nested_field(hdoc, h_ele, fdoc, f_ele, path_parts, path_index + 1, func);
            }
            return resolved;
        } else {
-            return highlight_nested_field(doc, it.value(), path_parts, path_index + 1, func);
+            return highlight_nested_field(hdoc, it.value(), fdoc, fobj[fragment], path_parts, path_index + 1, func);
        }
    } {
        return false;
--- a/src/collection.cpp
+++ b/src/collection.cpp
@ -1269,15 +1269,23 @@ Option<nlohmann::json> Collection::search(const std::string & raw_query,
    // handle which fields have to be highlighted

    std::vector<highlight_field_t> highlight_items;
-    tsl::htrie_set<char> hfield_names;
+    bool has_atleast_one_fully_highlighted_field = false;
+
+    std::vector<std::string> highlight_field_names;
+    StringUtils::split(highlight_fields, highlight_field_names, ",");
+
+    std::vector<std::string> highlight_full_field_names;
+    StringUtils::split(highlight_full_fields, highlight_full_field_names, ",");

    if(query != "*") {
-        process_highlight_fields(search_fields, include_fields_full, exclude_fields_full, highlight_fields,
-                                 highlight_full_fields, infixes, q_tokens, search_params->qtoken_set,
-                                 highlight_items);
+        process_highlight_fields(search_fields, include_fields_full, exclude_fields_full,
+                                 highlight_field_names, highlight_full_field_names, infixes, q_tokens,
+                                 search_params->qtoken_set, highlight_items);

        for(auto& highlight_item: highlight_items) {
-            hfield_names.insert(highlight_item.name);
+            if(highlight_item.fully_highlighted) {
+                has_atleast_one_fully_highlighted_field = true;
+            }
        }
    }

@ -1320,12 +1328,22 @@ Option<nlohmann::json> Collection::search(const std::string & raw_query,
                continue;
            }

-            nlohmann::json highlight_doc;
+            nlohmann::json highlight_res;

            if(!highlight_items.empty()) {
-                highlight_doc = document;
-                remove_flat_fields(highlight_doc);
-                highlight_doc.erase("id");
+                highlight_res["meta"] = nlohmann::json::object();
+
+                highlight_res["snippet"] = document;
+                remove_flat_fields(highlight_res["snippet"]);
+                highlight_res["snippet"].erase("id");
+
+                if(has_atleast_one_fully_highlighted_field) {
+                    highlight_res["full"] = document;
+                    remove_flat_fields(highlight_res["full"]);
+                    highlight_res["full"].erase("id");
+                } else {
+                    highlight_res["full"] = nlohmann::json::object();
+                }
            }

            nlohmann::json wrapper_doc;
@ -1333,6 +1351,9 @@ Option<nlohmann::json> Collection::search(const std::string & raw_query,
            std::vector<highlight_t> highlights;
            StringUtils string_utils;

+            tsl::htrie_set<char> hfield_names;
+            tsl::htrie_set<char> h_full_field_names;
+
            for(size_t i = 0; i < highlight_items.size(); i++) {
                auto& highlight_item = highlight_items[i];
                const std::string& field_name = highlight_item.name;
@ -1347,18 +1368,48 @@ Option<nlohmann::json> Collection::search(const std::string & raw_query,

                    highlight_t highlight;
                    highlight.field = search_field.name;
+
+                    bool found_highlight = false;
+                    bool found_full_highlight = false;
+
                    highlight_result(raw_query, search_field, i, highlight_item.qtoken_leaves, q_tokens, field_order_kv,
-                                     document, highlight_doc, string_utils, snippet_threshold,
+                                     document, highlight_res["snippet"], highlight_res["full"], highlight_res["meta"],
+                                     string_utils, snippet_threshold,
                                     highlight_affix_num_tokens, highlight_item.fully_highlighted, highlight_item.infix,
-                                     highlight_start_tag, highlight_end_tag, index_symbols, highlight);
+                                     highlight_start_tag, highlight_end_tag, index_symbols, highlight,
+                                     found_highlight, found_full_highlight);
                    if(!highlight.snippets.empty()) {
                        highlights.push_back(highlight);
                    }
+
+                    if(found_highlight) {
+                        hfield_names.insert(search_field.name);
+                        if(found_full_highlight) {
+                            h_full_field_names.insert(search_field.name);
+                        }
+                    }
+                }
+            }
+
+            // explicit highlight fields could be parent of searched fields, so we will take a pass at that
+            for(auto& hfield_name: highlight_full_field_names) {
+                auto it = h_full_field_names.equal_prefix_range(hfield_name);
+                if(it.first != it.second) {
+                    h_full_field_names.insert(hfield_name);
+                }
+            }
+
+            for(auto& hfield_name: highlight_field_names) {
+                auto it = hfield_names.equal_prefix_range(hfield_name);
+                if(it.first != it.second) {
+                    hfield_names.insert(hfield_name);
                }
            }

            // remove fields from highlight doc that were not highlighted
-            prune_doc(highlight_doc, hfield_names, tsl::htrie_set<char>(), "");
+            prune_doc(highlight_res["snippet"], hfield_names, tsl::htrie_set<char>(), "");
+            prune_doc(highlight_res["full"], h_full_field_names, tsl::htrie_set<char>(), "");
+
            std::sort(highlights.begin(), highlights.end());

            for(const auto & highlight: highlights) {
@ -1403,7 +1454,7 @@ Option<nlohmann::json> Collection::search(const std::string & raw_query,
            remove_flat_fields(document);

            wrapper_doc["document"] = document;
-            wrapper_doc["highlight"] = highlight_doc;
+            wrapper_doc["highlight"] = highlight_res;

            if(field_order_kv->match_score_index == CURATED_RECORD_IDENTIFIER) {
                wrapper_doc["curated"] = true;
@ -1698,8 +1749,8 @@ void Collection::populate_text_match_info(nlohmann::json& info, uint64_t match_s
 void Collection::process_highlight_fields(const std::vector<std::string>& search_fields,
                                          const tsl::htrie_set<char>& include_fields,
                                          const tsl::htrie_set<char>& exclude_fields,
-                                          const string& highlight_fields,
-                                          const std::string& highlight_full_fields,
+                                          const std::vector<std::string>& highlight_field_names,
+                                          const std::vector<std::string>& highlight_full_field_names,
                                          const std::vector<enable_t>& infixes,
                                          std::vector<std::string>& q_tokens,
                                          const tsl::htrie_map<char, token_leaf>& qtoken_set,
@ -1707,10 +1758,8 @@ void Collection::process_highlight_fields(const std::vector<std::string>& search

    // identify full highlight fields
    spp::sparse_hash_set<std::string> fields_highlighted_fully_set;
-    std::vector<std::string> fields_highlighted_fully_vec;
-    StringUtils::split(highlight_full_fields, fields_highlighted_fully_vec, ",");
    std::vector<std::string> fields_highlighted_fully_expanded;
-    for(std::string & highlight_full_field: fields_highlighted_fully_vec) {
+    for(const std::string& highlight_full_field: highlight_full_field_names) {
        extract_field_name(highlight_full_field, search_schema, fields_highlighted_fully_expanded);
    }

@ -1730,7 +1779,7 @@ void Collection::process_highlight_fields(const std::vector<std::string>& search
        }
    }

-    if(highlight_fields.empty()) {
+    if(highlight_field_names.empty()) {
        for(size_t i = 0; i < search_fields.size(); i++) {
            const auto& field_name = search_fields[i];
            if(exclude_fields.count(field_name) != 0) {
@ -1748,17 +1797,20 @@ void Collection::process_highlight_fields(const std::vector<std::string>& search
            highlight_items.emplace_back(field_name, fully_highlighted, infixed);
        }
    } else {
-        std::vector<std::string> highlight_field_names;
-        StringUtils::split(highlight_fields, highlight_field_names, ",");
-
+        std::vector<std::string> highlight_field_names_expanded;
        for(size_t i = 0; i < highlight_field_names.size(); i++) {
-            if(search_schema.count(highlight_field_names[i]) == 0) {
+            extract_field_name(highlight_field_names[i], search_schema, highlight_field_names_expanded);
+        }
+
+        for(size_t i = 0; i < highlight_field_names_expanded.size(); i++) {
+            const auto& highlight_field_name = highlight_field_names_expanded[i];
+            if(search_schema.count(highlight_field_name) == 0) {
                // ignore fields not part of schema
                continue;
            }
-            bool fully_highlighted = (fields_highlighted_fully_set.count(highlight_field_names[i]) != 0);
-            bool infixed = (fields_infixed_set.count(highlight_field_names[i]) != 0);
-            highlight_items.emplace_back(highlight_field_names[i], fully_highlighted, infixed);
+            bool fully_highlighted = (fields_highlighted_fully_set.count(highlight_field_name) != 0);
+            bool infixed = (fields_infixed_set.count(highlight_field_name) != 0);
+            highlight_items.emplace_back(highlight_field_name, fully_highlighted, infixed);
        }
    }

@ -2047,6 +2099,8 @@ void Collection::highlight_result(const std::string& raw_query, const field &sea
                                  const std::vector<std::string>& q_tokens,
                                  const KV* field_order_kv, const nlohmann::json & document,
                                  nlohmann::json& highlight_doc,
+                                  nlohmann::json& highlight_full_doc,
+                                  nlohmann::json& highlight_meta,
                                  StringUtils & string_utils,
                                  const size_t snippet_threshold,
                                  const size_t highlight_affix_num_tokens,
@ -2055,12 +2109,16 @@ void Collection::highlight_result(const std::string& raw_query, const field &sea
                                  const std::string& highlight_start_tag,
                                  const std::string& highlight_end_tag,
                                  const uint8_t* index_symbols,
-                                  highlight_t& highlight) const {
+                                  highlight_t& highlight,
+                                  bool& found_highlight,
+                                  bool& found_full_highlight) const {

    if(q_tokens.size() == 1 && q_tokens[0] == "*") {
        return;
    }

+    tsl::htrie_set<char> matched_tokens;
+
    bool is_cyrillic = Tokenizer::is_cyrillic(search_field.locale);
    bool normalise = is_cyrillic ? false : true;

@ -2123,15 +2181,18 @@ void Collection::highlight_result(const std::string& raw_query, const field &sea
            std::vector<std::string> path_parts;
            StringUtils::split(search_field.name, path_parts, ".");

-            highlight_nested_field(highlight_doc, highlight_doc, path_parts, 0, [&](nlohmann::json& str_obj) {
+            highlight_nested_field(highlight_doc, highlight_doc, highlight_full_doc, highlight_full_doc,
+                                   path_parts, 0, [&](nlohmann::json& h_obj, nlohmann::json& f_obj) {
                Match match;
                match_index_t match_index(match, 0, 0);
                int last_valid_offset_index = -1;
                size_t last_valid_offset = 0;

-                std::string text = str_obj.get<std::string>();
-                bool found_higlight = handle_highlight_text(text, normalise, search_field, symbols_to_index,
-                                                            token_separators, highlight, string_utils, is_cyrillic,
+                highlight_t array_highlight = highlight;
+
+                std::string text = h_obj.get<std::string>();
+                handle_highlight_text(text, normalise, search_field, symbols_to_index,
+                                                            token_separators, array_highlight, string_utils, is_cyrillic,
                                                            highlight_affix_num_tokens,
                                                            qtoken_leaves, last_valid_offset_index, match,
                                                            last_raw_q_token,
@ -2139,11 +2200,30 @@ void Collection::highlight_result(const std::string& raw_query, const field &sea
                                                            raw_query_tokens,
                                                            last_valid_offset, highlight_start_tag, highlight_end_tag,
                                                            index_symbols, match_index);
-                if(!highlight.snippets.empty()) {
-                    str_obj = highlight.snippets[0];
+
+                if(!array_highlight.snippets.empty()) {
+                    h_obj = array_highlight.snippets[0];
+                    found_highlight = found_highlight || true;
+                    for(auto& token_vec: array_highlight.matched_tokens) {
+                        for(auto& token: token_vec) {
+                            matched_tokens.insert(token);
+                        }
+                    }
+                }
+
+                if(highlight_fully && !array_highlight.values.empty()) {
+                    f_obj = array_highlight.values[0];;
+                    found_full_highlight = found_full_highlight || true;
                }
            });

+            if(found_highlight) {
+                highlight_meta[search_field.name] = nlohmann::json::object();
+                for(auto it = matched_tokens.begin(); it != matched_tokens.end(); ++it) {
+                    highlight_meta[search_field.name]["matched_tokens"].push_back(it.key());
+                }
+            }
+
            return ;
        }

@ -2228,6 +2308,18 @@ void Collection::highlight_result(const std::string& raw_query, const field &sea
                              last_valid_offset, highlight_start_tag, highlight_end_tag,
                              index_symbols, match_index);

+        if(!highlight.snippets.empty()) {
+            found_highlight = found_highlight || true;
+            for(auto& token_vec: highlight.matched_tokens) {
+                for(auto& token: token_vec) {
+                    matched_tokens.insert(token);
+                }
+            }
+        }
+
+        if(!highlight.values.empty()) {
+            found_full_highlight = found_full_highlight || true;
+        }
    }

    highlight.field = search_field.name;
@ -2237,27 +2329,55 @@ void Collection::highlight_result(const std::string& raw_query, const field &sea
        highlight.match_score = match_indices[0].match_score;
    }

-    if(search_field.nested) {
-        std::vector<std::string> parts;
-        StringUtils::split(search_field.name, parts, ".");
-        nlohmann::json* val = highlight_doc.contains(parts[0]) ? &highlight_doc[parts[0]] : nullptr;
+    // in-place highlighting under the new highlight structure
+    std::vector<std::string> parts;
+    StringUtils::split(search_field.name, parts, ".");
+    nlohmann::json* hval = highlight_doc.contains(parts[0]) ? &highlight_doc[parts[0]] : nullptr;
+    nlohmann::json* fval = highlight_full_doc.contains(parts[0]) ? &highlight_full_doc[parts[0]] : nullptr;

-        for(size_t i = 1; val != nullptr && i < parts.size(); i++) {
-            const auto& part = parts[i];
-            if(val->contains(part)) {
-                val = &val->at(part);
-            } else {
-                val = nullptr;
+    for(size_t i = 1; hval != nullptr && i < parts.size(); i++) {
+        const auto& part = parts[i];
+        if(hval->contains(part)) {
+            hval = &hval->at(part);
+        } else {
+            hval = nullptr;
+        }
+    }
+
+    for(size_t i = 1; fval != nullptr && i < parts.size(); i++) {
+        const auto& part = parts[i];
+        if(fval->contains(part)) {
+            fval = &fval->at(part);
+        } else {
+            fval = nullptr;
+        }
+    }
+
+    if(hval) {
+        if(highlight.indices.empty()) {
+            *hval = highlight.snippets[0];
+        } else {
+            if(hval->is_array()) {
+                for(size_t hi = 0; hi < highlight.indices.size(); hi++) {
+                    hval->at(highlight.indices[hi]) = highlight.snippets[hi];
+                }
            }
        }

-        if(val) {
+        highlight_meta[search_field.name] = nlohmann::json::object();
+        for(auto it = matched_tokens.begin(); it != matched_tokens.end(); ++it) {
+            highlight_meta[search_field.name]["matched_tokens"].push_back(it.key());
+        }
+    }
+
+    if(fval) {
+        if(!highlight.values.empty()) {
            if(highlight.indices.empty()) {
-                *val = highlight.snippets[0];
+                *fval = highlight.values[0];
            } else {
-                if(val->is_array()) {
-                    for(size_t hi = 0; hi < highlight.indices.size(); hi++) {
-                        val->at(highlight.indices[hi]) = highlight.snippets[hi];
+                if(fval->is_array()) {
+                    for(size_t hi = 0; hi < highlight.values.size(); hi++) {
+                        fval->at(highlight.indices[hi]) = highlight.values[hi];
                    }
                }
            }
--- a/test/collection_nested_fields_test.cpp
+++ b/test/collection_nested_fields_test.cpp
@ -326,45 +326,27 @@ TEST_F(CollectionNestedFieldsTest, SearchOnFieldsOnWildcardSchema) {
      "locations":[
        {
          "address":{
-            "city":"Beaverton",
-            "products":[
-              "shoes",
-              "tshirts"
-            ],
            "street":"One Bowerman Drive"
-          },
-          "country":"USA"
+          }
        },
        {
          "address":{
-            "city":"Thornhill",
-            "products":[
-              "sneakers",
-              "shoes"
-            ],
            "street":"175 <mark>Commerce</mark> Valley"
-          },
-          "country":"Canada"
+          }
        }
      ]
    })"_json;

-    ASSERT_EQ(highlight_doc.dump(), results["hits"][0]["highlight"].dump());
+    ASSERT_EQ(highlight_doc.dump(), results["hits"][0]["highlight"]["snippet"].dump());
    ASSERT_EQ(0, results["hits"][0]["highlights"].size());

-    // search specific nested fields
+    // search specific nested fields, only matching field is highlighted by default
    results = coll1->search("one shoe", {"locations.address.street", "employees.tags"}, "", {}, sort_fields,
                            {0}, 10, 1, FREQUENCY, {true}).get();
    ASSERT_EQ(1, results["hits"].size());
    ASSERT_EQ(doc, results["hits"][0]["document"]);

    highlight_doc = R"({
-      "employees":{
-        "tags":[
-          "senior plumber",
-          "electrician"
-        ]
-      },
      "locations":[
        {
          "address":{
@ -373,13 +355,13 @@ TEST_F(CollectionNestedFieldsTest, SearchOnFieldsOnWildcardSchema) {
        },
        {
          "address":{
-            "street":"<mark>One</mark> Bowerman Drive"
+            "street":"175 Commerce Valley"
          }
        }
      ]
    })"_json;

-    ASSERT_EQ(highlight_doc.dump(), results["hits"][0]["highlight"].dump());
+    ASSERT_EQ(highlight_doc.dump(), results["hits"][0]["highlight"]["snippet"].dump());
    ASSERT_EQ(0, results["hits"][0]["highlights"].size());

    // try to search nested fields that don't exist
@ -451,6 +433,301 @@ TEST_F(CollectionNestedFieldsTest, IncludeExcludeFields) {
    ASSERT_EQ(R"({"locations":[{"address":{"products":["shoes","tshirts"]}},{"address":{"products":["sneakers","shoes"]}}]})", doc.dump());
 }

+TEST_F(CollectionNestedFieldsTest, HighlightNestedFieldFully) {
+    std::vector<field> fields = {field(".*", field_types::AUTO, false, true)};
+
+    auto op = collectionManager.create_collection("coll1", 1, fields, "", 0, field_types::AUTO);
+    ASSERT_TRUE(op.ok());
+    Collection* coll1 = op.get();
+
+    auto doc = R"({
+        "company_names": ["Space Corp. LLC", "Drive One Inc."],
+        "company": {"names": ["Space Corp. LLC", "Drive One Inc."]},
+        "locations": [
+            { "pincode": 100, "country": "USA",
+              "address": { "street": "One Bowerman Drive", "city": "Beaverton", "products": ["shoes", "tshirts"] }
+            },
+            { "pincode": 200, "country": "Canada",
+              "address": { "street": "175 Commerce Drive", "city": "Thornhill", "products": ["sneakers", "shoes"] }
+            }
+        ]
+    })"_json;
+
+    auto add_op = coll1->add(doc.dump(), CREATE);
+    ASSERT_TRUE(add_op.ok());
+
+    // search both simply nested and deeply nested array-of-objects
+    auto results = coll1->search("One", {"locations.address"}, "", {}, sort_fields, {0}, 10, 1,
+                                 token_ordering::FREQUENCY, {true}, 10, spp::sparse_hash_set<std::string>(),
+                                 spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "locations.address").get();
+
+    ASSERT_EQ(1, results["hits"].size());
+
+    auto highlight_doc = R"({
+      "locations":[
+        {
+          "address":{
+            "street":"<mark>One</mark> Bowerman Drive"
+          }
+        },
+        {
+          "address":{
+            "street":"175 Commerce Drive"
+          }
+        }
+      ]
+    })"_json;
+
+    auto highlight_full_doc = R"({
+        "locations":[
+          {
+            "address":{
+              "city":"Beaverton",
+              "products":[
+                "shoes",
+                "tshirts"
+              ],
+              "street":"<mark>One</mark> Bowerman Drive"
+            }
+          },
+          {
+            "address":{
+              "city":"Thornhill",
+              "products":[
+                "sneakers",
+                "shoes"
+              ],
+              "street":"175 Commerce Drive"
+            }
+          }
+        ]
+    })"_json;
+
+    ASSERT_EQ(highlight_doc.dump(), results["hits"][0]["highlight"]["snippet"].dump());
+    ASSERT_EQ(highlight_full_doc.dump(), results["hits"][0]["highlight"]["full"].dump());
+    ASSERT_EQ(0, results["hits"][0]["highlights"].size());
+
+    // repeating token
+
+    results = coll1->search("drive", {"locations.address"}, "", {}, sort_fields, {0}, 10, 1,
+                            token_ordering::FREQUENCY, {true}, 10, spp::sparse_hash_set<std::string>(),
+                            spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "locations.address").get();
+
+    ASSERT_EQ(1, results["hits"].size());
+
+    highlight_doc = R"({
+      "locations":[
+        {
+          "address":{
+            "street":"One Bowerman <mark>Drive</mark>"
+          }
+        },
+        {
+          "address":{
+            "street":"175 Commerce <mark>Drive</mark>"
+          }
+        }
+      ]
+    })"_json;
+
+    ASSERT_EQ(highlight_doc.dump(), results["hits"][0]["highlight"]["snippet"].dump());
+    ASSERT_EQ(0, results["hits"][0]["highlights"].size());
+
+    // nested array of array, highlighting parent of searched nested field
+    results = coll1->search("shoes", {"locations.address.products"}, "", {}, sort_fields, {0}, 10, 1,
+                            token_ordering::FREQUENCY, {true}, 10, spp::sparse_hash_set<std::string>(),
+                            spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "locations.address",
+                            20, {}, {}, {}, 0, "<mark>", "</mark>", {}, 1000, true, false, true,
+                            "locations.address").get();
+
+    ASSERT_EQ(1, results["hits"].size());
+    highlight_full_doc = R"({
+      "locations":[
+        {
+          "address":{
+            "city":"Beaverton",
+            "products":[
+              "<mark>shoes</mark>",
+              "tshirts"
+            ],
+            "street":"One Bowerman Drive"
+          }
+        },
+        {
+          "address":{
+            "city":"Thornhill",
+            "products":[
+              "sneakers",
+              "<mark>shoes</mark>"
+            ],
+            "street":"175 Commerce Drive"
+          }
+        }
+      ]
+    })"_json;
+
+    ASSERT_EQ(highlight_full_doc.dump(), results["hits"][0]["highlight"]["full"].dump());
+    ASSERT_EQ(highlight_full_doc.dump(), results["hits"][0]["highlight"]["snippet"].dump());
+
+    // full highlighting only one of the 3 highlight fields
+    results = coll1->search("drive", {"company.names", "company_names", "locations.address"}, "", {}, sort_fields, {0}, 10, 1,
+                            token_ordering::FREQUENCY, {true}, 10, spp::sparse_hash_set<std::string>(),
+                            spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "locations.address",
+                            20, {}, {}, {}, 0, "<mark>", "</mark>", {}, 1000, true, false, true,
+                            "company.names,company_names,locations.address").get();
+
+    highlight_full_doc = R"({
+        "locations":[
+          {
+            "address":{
+              "city":"Beaverton",
+              "products":[
+                "shoes",
+                "tshirts"
+              ],
+              "street":"One Bowerman <mark>Drive</mark>"
+            }
+          },
+          {
+            "address":{
+              "city":"Thornhill",
+              "products":[
+                "sneakers",
+                "shoes"
+              ],
+              "street":"175 Commerce <mark>Drive</mark>"
+            }
+          }
+        ]
+    })"_json;
+
+    highlight_doc = R"({
+        "company":{
+          "names": ["Space Corp. LLC", "<mark>Drive</mark> One Inc."]
+        },
+        "company_names": ["Space Corp. LLC", "<mark>Drive</mark> One Inc."],
+        "locations":[
+          {
+            "address":{
+              "city":"Beaverton",
+              "products":[
+                "shoes",
+                "tshirts"
+              ],
+              "street":"One Bowerman <mark>Drive</mark>"
+            }
+          },
+          {
+            "address":{
+              "city":"Thornhill",
+              "products":[
+                "sneakers",
+                "shoes"
+              ],
+              "street":"175 Commerce <mark>Drive</mark>"
+            }
+          }
+        ]
+    })"_json;
+
+    ASSERT_EQ(highlight_full_doc.dump(), results["hits"][0]["highlight"]["full"].dump());
+    ASSERT_EQ(highlight_doc.dump(), results["hits"][0]["highlight"]["snippet"].dump());
+
+    // if highlight fields not provided, only matching sub-fields should appear in highlight
+
+    results = coll1->search("space", {"company.names", "company_names", "locations.address"}, "", {}, sort_fields, {0}, 10, 1,
+                            token_ordering::FREQUENCY, {true}, 10, spp::sparse_hash_set<std::string>(),
+                            spp::sparse_hash_set<std::string>(), 10, "", 30, 4).get();
+
+    highlight_doc = R"({
+        "company":{
+          "names": ["<mark>Space</mark> Corp. LLC", "Drive One Inc."]
+        },
+        "company_names": ["<mark>Space</mark> Corp. LLC", "Drive One Inc."]
+    })"_json;
+
+    ASSERT_EQ(highlight_doc.dump(), results["hits"][0]["highlight"]["snippet"].dump());
+    ASSERT_EQ(0, results["hits"][0]["highlight"]["full"].size());
+
+    // only a single highlight full field provided
+
+    results = coll1->search("space", {"company.names", "company_names", "locations.address"}, "", {}, sort_fields, {0}, 10, 1,
+                            token_ordering::FREQUENCY, {true}, 10, spp::sparse_hash_set<std::string>(),
+                            spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "company.names").get();
+
+    highlight_full_doc = R"({
+      "company":{
+        "names":[
+          "<mark>Space</mark> Corp. LLC",
+          "Drive One Inc."
+        ]
+      }
+    })"_json;
+
+    highlight_doc = R"({
+      "company":{
+        "names":[
+          "<mark>Space</mark> Corp. LLC",
+          "Drive One Inc."
+        ]
+      },
+      "company_names":[
+        "<mark>Space</mark> Corp. LLC",
+        "Drive One Inc."
+      ]
+    })"_json;
+
+    ASSERT_EQ(highlight_doc.dump(), results["hits"][0]["highlight"]["snippet"].dump());
+    ASSERT_EQ(highlight_full_doc.dump(), results["hits"][0]["highlight"]["full"].dump());
+}
+
+TEST_F(CollectionNestedFieldsTest, HighlightShouldHaveMeta) {
+    std::vector<field> fields = {field(".*", field_types::AUTO, false, true)};
+
+    auto op = collectionManager.create_collection("coll1", 1, fields, "", 0, field_types::AUTO);
+    ASSERT_TRUE(op.ok());
+    Collection* coll1 = op.get();
+
+    auto doc = R"({
+        "company_names": ["Quick brown fox jumped.", "The red fox was not fast."],
+        "details": {
+            "description": "Quick set, go.",
+            "names": ["Quick brown fox jumped.", "The red fox was not fast."]
+        },
+        "locations": [
+            {
+              "address": { "street": "Brown Shade Avenue" }
+            },
+            {
+                "address": { "street": "Graywolf Lane" }
+            }
+        ]
+    })"_json;
+
+    auto add_op = coll1->add(doc.dump(), CREATE);
+    ASSERT_TRUE(add_op.ok());
+
+    // search both simply nested and deeply nested array-of-objects
+    auto results = coll1->search("brown fox", {"company_names", "details", "locations"},
+                                 "", {}, sort_fields, {0}, 10, 1,
+                                 token_ordering::FREQUENCY, {true}, 10, spp::sparse_hash_set<std::string>(),
+                                 spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "locations.address").get();
+
+    ASSERT_EQ(3, results["hits"][0]["highlight"]["meta"].size());
+    ASSERT_EQ(1, results["hits"][0]["highlight"]["meta"]["company_names"].size());
+
+    ASSERT_EQ(2, results["hits"][0]["highlight"]["meta"]["company_names"]["matched_tokens"].size());
+    ASSERT_EQ("brown", results["hits"][0]["highlight"]["meta"]["company_names"]["matched_tokens"][0]);
+    ASSERT_EQ("fox", results["hits"][0]["highlight"]["meta"]["company_names"]["matched_tokens"][1]);
+
+    ASSERT_EQ(2, results["hits"][0]["highlight"]["meta"]["details.names"]["matched_tokens"].size());
+    ASSERT_EQ("brown", results["hits"][0]["highlight"]["meta"]["details.names"]["matched_tokens"][0]);
+    ASSERT_EQ("fox", results["hits"][0]["highlight"]["meta"]["details.names"]["matched_tokens"][1]);
+
+    ASSERT_EQ(1, results["hits"][0]["highlight"]["meta"]["locations.address.street"]["matched_tokens"].size());
+    ASSERT_EQ("Brown", results["hits"][0]["highlight"]["meta"]["locations.address.street"]["matched_tokens"][0]);
+}
+
 TEST_F(CollectionNestedFieldsTest, GroupByOnNestedFieldsWithWildcardSchema) {
    std::vector<field> fields = {field(".*", field_types::AUTO, false, true),
                                 field("education.name", field_types::STRING_ARRAY, true, true),