From 38d44a7c8ae9f51e96ba7b06d1bbb8c472f77668 Mon Sep 17 00:00:00 2001
From: Kishore Nallan <kishorenc@gmail.com>
Date: Mon, 26 Jul 2021 13:18:42 +0530
Subject: [PATCH 1/6] Highlight field value that is a prefix of the query.

---
 include/index.h                   |  3 ++-
 src/collection.cpp                | 16 ++++++-----
 src/index.cpp                     |  9 ++++---
 test/collection_specific_test.cpp | 45 ++++++++++++++++++++++++++++---
 test/collection_test.cpp          |  6 ++++-
 5 files changed, 64 insertions(+), 15 deletions(-)
diff --git a/include/index.h b/include/index.h
index 37bb45e0..b10bea9d 100644
--- a/include/index.h
+++ b/include/index.h
@@ -396,7 +396,8 @@ public:
 
     static void transform_for_180th_meridian(GeoCoord& point, double offset);
 
-    art_leaf* get_token_leaf(const std::string & field_name, const unsigned char* token, uint32_t token_len);
+    void get_token_leaves(const std::string & field_name, const unsigned char* token, uint32_t token_len,
+                          std::vector<art_leaf*>& leaves);
 
     // the following methods are not synchronized because their parent calls are synchronized
 
diff --git a/src/collection.cpp b/src/collection.cpp
index 0624cb44..d754d66c 100644
--- a/src/collection.cpp
+++ b/src/collection.cpp
@@ -1501,11 +1501,12 @@ void Collection::highlight_result(const field &search_field,
         // Must search for the token string fresh on that field for the given document since `token_leaf`
         // is from the best matched field and need not be present in other fields of a document.
         Index* index = indices[field_order_kv->key % num_memory_shards];
-        art_leaf *actual_leaf = index->get_token_leaf(search_field.name, &token_leaf->key[0], token_leaf->key_len);
 
-        //LOG(INFO) << "field: " << search_field.name << ", key: " << token_leaf->key;
+        std::vector<art_leaf*> leaves;
+        index->get_token_leaves(search_field.name, &token_leaf->key[0], token_leaf->key_len, leaves);
 
-        if(actual_leaf != nullptr) {
+        for(const auto actual_leaf: leaves) {
+            //LOG(INFO) << "field: " << search_field.name << ", key: " << token_leaf->key;
             query_suggestion.push_back(actual_leaf);
             std::string token(reinterpret_cast<char*>(actual_leaf->key), actual_leaf->key_len-1);
             query_suggestion_tokens.insert(token);
@@ -1525,10 +1526,11 @@ void Collection::highlight_result(const field &search_field,
             }
 
             Index* index = indices[field_order_kv->key % num_memory_shards];
-            art_leaf *actual_leaf = index->get_token_leaf(search_field.name,
-                                                          reinterpret_cast<const unsigned char *>(q_token.c_str()),
-                                                          q_token.size() + 1);
-            if(actual_leaf != nullptr) {
+            std::vector<art_leaf*> leaves;
+            index->get_token_leaves(search_field.name, reinterpret_cast<const unsigned char*>(q_token.c_str()),
+                                    q_token.size() + 1, leaves);
+
+            for(const auto actual_leaf: leaves) {
                 std::vector<uint16_t> positions;
                 uint32_t doc_index = actual_leaf->values->ids.indexOf(field_order_kv->key);
                 if(doc_index != actual_leaf->values->ids.getLength()) {
diff --git a/src/index.cpp b/src/index.cpp
index 6f6f4b7d..1b8227d2 100644
--- a/src/index.cpp
+++ b/src/index.cpp
@@ -2639,10 +2639,13 @@ void Index::tokenize_string_field(const nlohmann::json& document, const field& s
     }
 }
 
-art_leaf* Index::get_token_leaf(const std::string & field_name, const unsigned char* token, uint32_t token_len) {
+void Index::get_token_leaves(const std::string & field_name, const unsigned char* token, uint32_t token_len,
+                             std::vector<art_leaf*>& leaves) {
     std::shared_lock lock(mutex);
-    const art_tree *t = search_index.at(field_name);
-    return (art_leaf*) art_search(t, token, (int) token_len);
+    art_tree *t = search_index.at(field_name);
+    token_len = (token_len == 0) ? 0 : token_len-1;
+    art_fuzzy_search(t, token, token_len, 0, 0, 2, token_ordering::MAX_SCORE,
+                     true, nullptr, 0, leaves);
 }
 
 const spp::sparse_hash_map<std::string, art_tree *> &Index::_get_search_index() const {
diff --git a/test/collection_specific_test.cpp b/test/collection_specific_test.cpp
index 786211f1..d316b230 100644
--- a/test/collection_specific_test.cpp
+++ b/test/collection_specific_test.cpp
@@ -606,10 +606,49 @@ TEST_F(CollectionSpecificTest, TokensSpreadAcrossFields) {
                                  spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 40, {}, {}, {}, 0,
                                  "<mark>", "</mark>", {4, 1}).get();
 
-    LOG(INFO) << results;
-
     ASSERT_EQ("0", results["hits"][0]["document"]["id"].get<std::string>());
     ASSERT_EQ("1", results["hits"][1]["document"]["id"].get<std::string>());
 
     collectionManager.drop_collection("coll1");
-}
\ No newline at end of file
+}
+
+TEST_F(CollectionSpecificTest, HighlightSecondaryFieldWithPrefixMatch) {
+    std::vector<field> fields = {field("title", field_types::STRING, false),
+                                 field("description", field_types::STRING, false),
+                                 field("points", field_types::INT32, false),};
+
+    Collection* coll1 = collectionManager.create_collection("coll1", 1, fields, "points").get();
+
+    nlohmann::json doc1;
+    doc1["id"] = "0";
+    doc1["title"] = "Functions and Equations";
+    doc1["description"] = "Use a function to solve an equation.";
+    doc1["points"] = 100;
+
+    nlohmann::json doc2;
+    doc2["id"] = "1";
+    doc2["title"] = "Function of effort";
+    doc2["description"] = "Learn all about it.";
+    doc2["points"] = 100;
+
+    ASSERT_TRUE(coll1->add(doc1.dump()).ok());
+    ASSERT_TRUE(coll1->add(doc2.dump()).ok());
+
+    auto results = coll1->search("function", {"title", "description"}, "", {}, {}, {0}, 10,
+                                 1, FREQUENCY, {true, true},
+                                 10, spp::sparse_hash_set<std::string>(),
+                                 spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 40, {}, {}, {}, 0,
+                                 "<mark>", "</mark>", {1, 1}).get();
+
+    ASSERT_EQ("0", results["hits"][0]["document"]["id"].get<std::string>());
+
+    ASSERT_EQ(2, results["hits"][0]["highlights"].size());
+
+    ASSERT_EQ("<mark>Functions</mark> and Equations",
+              results["hits"][0]["highlights"][0]["snippet"].get<std::string>());
+
+    ASSERT_EQ("Use a <mark>function</mark> to solve an equation.",
+              results["hits"][0]["highlights"][1]["snippet"].get<std::string>());
+
+    collectionManager.drop_collection("coll1");
+}
diff --git a/test/collection_test.cpp b/test/collection_test.cpp
index 0e9221a2..d82982d0 100644
--- a/test/collection_test.cpp
+++ b/test/collection_test.cpp
@@ -3317,7 +3317,7 @@ TEST_F(CollectionTest, MultiFieldHighlighting) {
 
     ASSERT_STREQ("0", results["hits"][0]["document"]["id"].get<std::string>().c_str());
 
-    ASSERT_EQ(2, results["hits"][0]["highlights"].size());
+    ASSERT_EQ(3, results["hits"][0]["highlights"].size());
     ASSERT_EQ("name", results["hits"][0]["highlights"][0]["field"].get<std::string>());
     ASSERT_EQ("Best Wireless Vehicle <mark>Charger</mark>",
               results["hits"][0]["highlights"][0]["snippet"].get<std::string>());
@@ -3326,6 +3326,10 @@ TEST_F(CollectionTest, MultiFieldHighlighting) {
     ASSERT_EQ("Easily replenish your cell phone with this wireless <mark>charger.</mark>",
               results["hits"][0]["highlights"][1]["snippet"].get<std::string>());
 
+    ASSERT_EQ("categories", results["hits"][0]["highlights"][2]["field"].get<std::string>());
+    ASSERT_EQ("Car <mark>Chargers</mark>",
+              results["hits"][0]["highlights"][2]["snippets"][0].get<std::string>());
+
     results = coll1->search("John With Denver",
                             {"description"}, "", {}, {}, {0}, 10, 1, FREQUENCY,
                             {true}, 1, spp::sparse_hash_set<std::string>(),

From e45f18785f143b3933bd529ba2cce77977cf9c07 Mon Sep 17 00:00:00 2001
From: Kishore Nallan <kishorenc@gmail.com>
Date: Mon, 26 Jul 2021 19:44:10 +0530
Subject: [PATCH 2/6] Ignore id field present in schema.

---
 include/field.h                   | 12 ++++++++++++
 test/collection_specific_test.cpp | 24 ++++++++++++++++++++++++
 2 files changed, 36 insertions(+)

diff --git a/include/field.h b/include/field.h
index 8d9cbf83..e7200da8 100644
--- a/include/field.h
+++ b/include/field.h
@@ -5,6 +5,7 @@
 #include "art.h"
 #include "option.h"
 #include "string_utils.h"
+#include "logger.h"
 #include "json.hpp"
 
 namespace field_types {
@@ -192,6 +193,10 @@ struct field {
         bool found_default_sorting_field = false;
 
         for(const field & field: fields) {
+            if(field.name == "id") {
+                continue;
+            }
+
             nlohmann::json field_val;
             field_val[fields::name] = field.name;
             field_val[fields::type] = field.type;
@@ -263,6 +268,13 @@ struct field {
         size_t num_auto_detect_fields = 0;
 
         for(nlohmann::json & field_json: fields_json) {
+            if(field_json["name"] == "id") {
+                // No field should exist with the name "id" as it is reserved for internal use
+                // We cannot throw an error here anymore since that will break backward compatibility!
+                LOG(WARNING) << "Collection schema cannot contain a field with name `id`. Ignoring field.";
+                continue;
+            }
+
             if(!field_json.is_object() ||
                field_json.count(fields::name) == 0 || field_json.count(fields::type) == 0 ||
                !field_json.at(fields::name).is_string() || !field_json.at(fields::type).is_string()) {
diff --git a/test/collection_specific_test.cpp b/test/collection_specific_test.cpp
index d316b230..f36be260 100644
--- a/test/collection_specific_test.cpp
+++ b/test/collection_specific_test.cpp
@@ -652,3 +652,27 @@ TEST_F(CollectionSpecificTest, HighlightSecondaryFieldWithPrefixMatch) {
 
     collectionManager.drop_collection("coll1");
 }
+
+TEST_F(CollectionSpecificTest, GuardAgainstIdFieldInSchema) {
+    // The "id" field, if defined in the schema should be ignored
+
+    std::vector<field> fields = {field("title", field_types::STRING, false),
+                                 field("id", field_types::STRING, false),
+                                 field("points", field_types::INT32, false),};
+
+    nlohmann::json schema;
+    schema["name"] = "books";
+    schema["fields"] = nlohmann::json::array();
+    schema["fields"][0]["name"] = "title";
+    schema["fields"][0]["type"] = "string";
+    schema["fields"][1]["name"] = "id";
+    schema["fields"][1]["type"] = "string";
+    schema["fields"][2]["name"] = "points";
+    schema["fields"][2]["type"] = "int32";
+
+    Collection* coll1 = collectionManager.create_collection(schema).get();
+
+    ASSERT_EQ(0, coll1->get_schema().count("id"));
+
+    collectionManager.drop_collection("coll1");
+}

From b4c222064c6bf96f5af6d727d732eea748c4e5b2 Mon Sep 17 00:00:00 2001
From: Kishore Nallan <kishorenc@gmail.com>
Date: Mon, 26 Jul 2021 19:44:38 +0530
Subject: [PATCH 3/6] Handle bad data in ingestion text gracefully.

---
 src/collection.cpp                |  3 ++-
 test/collection_specific_test.cpp | 13 +++++++++++++
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/src/collection.cpp b/src/collection.cpp
index d754d66c..aaf0d11f 100644
--- a/src/collection.cpp
+++ b/src/collection.cpp
@@ -337,7 +337,8 @@ void Collection::batch_index(std::vector<std::vector<index_record>> &index_batch
                 res["code"] = index_record.indexed.code();
             }
 
-            json_out[index_record.position] = res.dump();
+            json_out[index_record.position] = res.dump(-1, ' ', false,
+                                                       nlohmann::detail::error_handler_t::ignore);
         }
     }
 }
diff --git a/test/collection_specific_test.cpp b/test/collection_specific_test.cpp
index f36be260..4fc657e7 100644
--- a/test/collection_specific_test.cpp
+++ b/test/collection_specific_test.cpp
@@ -676,3 +676,16 @@ TEST_F(CollectionSpecificTest, GuardAgainstIdFieldInSchema) {
 
     collectionManager.drop_collection("coll1");
 }
+
+TEST_F(CollectionSpecificTest, HandleBadCharactersInStringGracefully) {
+    std::vector<field> fields = {field("title", field_types::STRING, false),
+                                 field("points", field_types::INT32, false),};
+
+    Collection* coll1 = collectionManager.create_collection("coll1", 1, fields, "points").get();
+    std::string doc_str = "不推荐。\",\"price\":10.12,\"ratings\":5}";
+
+    auto add_op = coll1->add(doc_str);
+    ASSERT_FALSE(add_op.ok());
+
+    collectionManager.drop_collection("coll1");
+}

From 13cb7b936493f694a296e62e0860dd17e72bc7bc Mon Sep 17 00:00:00 2001
From: Kishore Nallan <kishorenc@gmail.com>
Date: Tue, 27 Jul 2021 17:57:49 +0530
Subject: [PATCH 4/6] Revert "Highlight field value that is a prefix of the
 query."

This reverts commit 545027a59bc55b24c2fece112b4fa6a655a1f79e.

# Conflicts:
#	test/collection_specific_test.cpp
---
 include/index.h                   |  3 +--
 src/collection.cpp                | 16 ++++++------
 src/index.cpp                     |  9 +++----
 test/collection_specific_test.cpp | 41 -------------------------------
 test/collection_test.cpp          |  6 +----
 5 files changed, 12 insertions(+), 63 deletions(-)

diff --git a/include/index.h b/include/index.h
index b10bea9d..37bb45e0 100644
--- a/include/index.h
+++ b/include/index.h
@@ -396,8 +396,7 @@ public:
 
     static void transform_for_180th_meridian(GeoCoord& point, double offset);
 
-    void get_token_leaves(const std::string & field_name, const unsigned char* token, uint32_t token_len,
-                          std::vector<art_leaf*>& leaves);
+    art_leaf* get_token_leaf(const std::string & field_name, const unsigned char* token, uint32_t token_len);
 
     // the following methods are not synchronized because their parent calls are synchronized
 
diff --git a/src/collection.cpp b/src/collection.cpp
index aaf0d11f..3373ab30 100644
--- a/src/collection.cpp
+++ b/src/collection.cpp
@@ -1502,12 +1502,11 @@ void Collection::highlight_result(const field &search_field,
         // Must search for the token string fresh on that field for the given document since `token_leaf`
         // is from the best matched field and need not be present in other fields of a document.
         Index* index = indices[field_order_kv->key % num_memory_shards];
+        art_leaf *actual_leaf = index->get_token_leaf(search_field.name, &token_leaf->key[0], token_leaf->key_len);
 
-        std::vector<art_leaf*> leaves;
-        index->get_token_leaves(search_field.name, &token_leaf->key[0], token_leaf->key_len, leaves);
+        //LOG(INFO) << "field: " << search_field.name << ", key: " << token_leaf->key;
 
-        for(const auto actual_leaf: leaves) {
-            //LOG(INFO) << "field: " << search_field.name << ", key: " << token_leaf->key;
+        if(actual_leaf != nullptr) {
             query_suggestion.push_back(actual_leaf);
             std::string token(reinterpret_cast<char*>(actual_leaf->key), actual_leaf->key_len-1);
             query_suggestion_tokens.insert(token);
@@ -1527,11 +1526,10 @@ void Collection::highlight_result(const field &search_field,
             }
 
             Index* index = indices[field_order_kv->key % num_memory_shards];
-            std::vector<art_leaf*> leaves;
-            index->get_token_leaves(search_field.name, reinterpret_cast<const unsigned char*>(q_token.c_str()),
-                                    q_token.size() + 1, leaves);
-
-            for(const auto actual_leaf: leaves) {
+            art_leaf *actual_leaf = index->get_token_leaf(search_field.name,
+                                                          reinterpret_cast<const unsigned char *>(q_token.c_str()),
+                                                          q_token.size() + 1);
+            if(actual_leaf != nullptr) {
                 std::vector<uint16_t> positions;
                 uint32_t doc_index = actual_leaf->values->ids.indexOf(field_order_kv->key);
                 if(doc_index != actual_leaf->values->ids.getLength()) {
diff --git a/src/index.cpp b/src/index.cpp
index 1b8227d2..6f6f4b7d 100644
--- a/src/index.cpp
+++ b/src/index.cpp
@@ -2639,13 +2639,10 @@ void Index::tokenize_string_field(const nlohmann::json& document, const field& s
     }
 }
 
-void Index::get_token_leaves(const std::string & field_name, const unsigned char* token, uint32_t token_len,
-                             std::vector<art_leaf*>& leaves) {
+art_leaf* Index::get_token_leaf(const std::string & field_name, const unsigned char* token, uint32_t token_len) {
     std::shared_lock lock(mutex);
-    art_tree *t = search_index.at(field_name);
-    token_len = (token_len == 0) ? 0 : token_len-1;
-    art_fuzzy_search(t, token, token_len, 0, 0, 2, token_ordering::MAX_SCORE,
-                     true, nullptr, 0, leaves);
+    const art_tree *t = search_index.at(field_name);
+    return (art_leaf*) art_search(t, token, (int) token_len);
 }
 
 const spp::sparse_hash_map<std::string, art_tree *> &Index::_get_search_index() const {
diff --git a/test/collection_specific_test.cpp b/test/collection_specific_test.cpp
index 4fc657e7..c1855479 100644
--- a/test/collection_specific_test.cpp
+++ b/test/collection_specific_test.cpp
@@ -612,47 +612,6 @@ TEST_F(CollectionSpecificTest, TokensSpreadAcrossFields) {
     collectionManager.drop_collection("coll1");
 }
 
-TEST_F(CollectionSpecificTest, HighlightSecondaryFieldWithPrefixMatch) {
-    std::vector<field> fields = {field("title", field_types::STRING, false),
-                                 field("description", field_types::STRING, false),
-                                 field("points", field_types::INT32, false),};
-
-    Collection* coll1 = collectionManager.create_collection("coll1", 1, fields, "points").get();
-
-    nlohmann::json doc1;
-    doc1["id"] = "0";
-    doc1["title"] = "Functions and Equations";
-    doc1["description"] = "Use a function to solve an equation.";
-    doc1["points"] = 100;
-
-    nlohmann::json doc2;
-    doc2["id"] = "1";
-    doc2["title"] = "Function of effort";
-    doc2["description"] = "Learn all about it.";
-    doc2["points"] = 100;
-
-    ASSERT_TRUE(coll1->add(doc1.dump()).ok());
-    ASSERT_TRUE(coll1->add(doc2.dump()).ok());
-
-    auto results = coll1->search("function", {"title", "description"}, "", {}, {}, {0}, 10,
-                                 1, FREQUENCY, {true, true},
-                                 10, spp::sparse_hash_set<std::string>(),
-                                 spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 40, {}, {}, {}, 0,
-                                 "<mark>", "</mark>", {1, 1}).get();
-
-    ASSERT_EQ("0", results["hits"][0]["document"]["id"].get<std::string>());
-
-    ASSERT_EQ(2, results["hits"][0]["highlights"].size());
-
-    ASSERT_EQ("<mark>Functions</mark> and Equations",
-              results["hits"][0]["highlights"][0]["snippet"].get<std::string>());
-
-    ASSERT_EQ("Use a <mark>function</mark> to solve an equation.",
-              results["hits"][0]["highlights"][1]["snippet"].get<std::string>());
-
-    collectionManager.drop_collection("coll1");
-}
-
 TEST_F(CollectionSpecificTest, GuardAgainstIdFieldInSchema) {
     // The "id" field, if defined in the schema should be ignored
 
diff --git a/test/collection_test.cpp b/test/collection_test.cpp
index d82982d0..0e9221a2 100644
--- a/test/collection_test.cpp
+++ b/test/collection_test.cpp
@@ -3317,7 +3317,7 @@ TEST_F(CollectionTest, MultiFieldHighlighting) {
 
     ASSERT_STREQ("0", results["hits"][0]["document"]["id"].get<std::string>().c_str());
 
-    ASSERT_EQ(3, results["hits"][0]["highlights"].size());
+    ASSERT_EQ(2, results["hits"][0]["highlights"].size());
     ASSERT_EQ("name", results["hits"][0]["highlights"][0]["field"].get<std::string>());
     ASSERT_EQ("Best Wireless Vehicle <mark>Charger</mark>",
               results["hits"][0]["highlights"][0]["snippet"].get<std::string>());
@@ -3326,10 +3326,6 @@ TEST_F(CollectionTest, MultiFieldHighlighting) {
     ASSERT_EQ("Easily replenish your cell phone with this wireless <mark>charger.</mark>",
               results["hits"][0]["highlights"][1]["snippet"].get<std::string>());
 
-    ASSERT_EQ("categories", results["hits"][0]["highlights"][2]["field"].get<std::string>());
-    ASSERT_EQ("Car <mark>Chargers</mark>",
-              results["hits"][0]["highlights"][2]["snippets"][0].get<std::string>());
-
     results = coll1->search("John With Denver",
                             {"description"}, "", {}, {}, {0}, 10, 1, FREQUENCY,
                             {true}, 1, spp::sparse_hash_set<std::string>(),

From 331db4f27e7917dd025e3f89218f3d824b374788 Mon Sep 17 00:00:00 2001
From: Kishore Nallan <kishorenc@gmail.com>
Date: Tue, 27 Jul 2021 19:57:56 +0530
Subject: [PATCH 5/6] Add precision option to geo field sorting.

---
 include/field.h                  | 10 +++-
 src/collection.cpp               | 70 ++++++++++++++---------
 src/index.cpp                    |  5 ++
 test/collection_sorting_test.cpp | 98 +++++++++++++++++++++++++++++++-
 4 files changed, 152 insertions(+), 31 deletions(-)

diff --git a/include/field.h b/include/field.h
index e7200da8..03d82ec8 100644
--- a/include/field.h
+++ b/include/field.h
@@ -475,6 +475,7 @@ namespace sort_field_const {
     static const std::string seq_id = "_seq_id";
 
     static const std::string exclude_radius = "exclude_radius";
+    static const std::string precision = "precision";
 }
 
 struct sort_by {
@@ -484,14 +485,16 @@ struct sort_by {
     // geo related fields
     int64_t geopoint;
     uint32_t exclude_radius;
+    uint32_t geo_precision;
 
     sort_by(const std::string & name, const std::string & order):
-        name(name), order(order), geopoint(0), exclude_radius(0) {
+        name(name), order(order), geopoint(0), exclude_radius(0), geo_precision(0) {
 
     }
 
-    sort_by(const std::string &name, const std::string &order, int64_t geopoint, uint32_t exclude_radius) :
-            name(name), order(order), geopoint(geopoint), exclude_radius(exclude_radius) {
+    sort_by(const std::string &name, const std::string &order, int64_t geopoint,
+            uint32_t exclude_radius, uint32_t geo_precision) :
+            name(name), order(order), geopoint(geopoint), exclude_radius(exclude_radius), geo_precision(geo_precision) {
 
     }
 
@@ -500,6 +503,7 @@ struct sort_by {
         order = other.order;
         geopoint = other.geopoint;
         exclude_radius = other.exclude_radius;
+        geo_precision = other.geo_precision;
         return *this;
     }
 };
diff --git a/src/collection.cpp b/src/collection.cpp
index 3373ab30..44590a26 100644
--- a/src/collection.cpp
+++ b/src/collection.cpp
@@ -753,36 +753,54 @@ Option<nlohmann::json> Collection::search(const std::string & query, const std::
 
             if(geo_parts.size() == 3) {
                 // try to parse the exclude radius option
-                if(!StringUtils::begins_with(geo_parts[2], sort_field_const::exclude_radius)) {
-                    return Option<nlohmann::json>(400, error);
-                }
+                bool is_exclude_option = false;
 
-                std::vector<std::string> exclude_parts;
-                StringUtils::split(geo_parts[2], exclude_parts, ":");
-
-                if(exclude_parts.size() != 2) {
-                    return Option<nlohmann::json>(400, error);
-                }
-
-                std::vector<std::string> exclude_value_parts;
-                StringUtils::split(exclude_parts[1], exclude_value_parts, " ");
-
-                if(exclude_value_parts.size() != 2) {
-                    return Option<nlohmann::json>(400, error);
-                }
-
-                if(!StringUtils::is_float(exclude_value_parts[0])) {
-                    return Option<nlohmann::json>(400, error);
-                }
-
-                if(exclude_value_parts[1] == "km") {
-                    sort_field_std.exclude_radius = std::stof(exclude_value_parts[0]) * 1000;
-                } else if(exclude_value_parts[1] == "mi") {
-                    sort_field_std.exclude_radius = std::stof(exclude_value_parts[0]) * 1609.34;
+                if(StringUtils::begins_with(geo_parts[2], sort_field_const::exclude_radius)) {
+                    is_exclude_option = true;
+                } else if(StringUtils::begins_with(geo_parts[2], sort_field_const::precision)) {
+                    is_exclude_option = false;
                 } else {
-                    return Option<nlohmann::json>(400, "Sort field's exclude radius "
+                    return Option<nlohmann::json>(400, error);
+                }
+
+                std::vector<std::string> param_parts;
+                StringUtils::split(geo_parts[2], param_parts, ":");
+
+                if(param_parts.size() != 2) {
+                    return Option<nlohmann::json>(400, error);
+                }
+
+                std::vector<std::string> param_value_parts;
+                StringUtils::split(param_parts[1], param_value_parts, " ");
+
+                if(param_value_parts.size() != 2) {
+                    return Option<nlohmann::json>(400, error);
+                }
+
+                if(!StringUtils::is_float(param_value_parts[0])) {
+                    return Option<nlohmann::json>(400, error);
+                }
+
+                int32_t value_meters;
+
+                if(param_value_parts[1] == "km") {
+                    value_meters = std::stof(param_value_parts[0]) * 1000;
+                } else if(param_value_parts[1] == "mi") {
+                    value_meters = std::stof(param_value_parts[0]) * 1609.34;
+                } else {
+                    return Option<nlohmann::json>(400, "Sort field's parameter "
                                                        "unit must be either `km` or `mi`.");
                 }
+
+                if(value_meters <= 0) {
+                    return Option<nlohmann::json>(400, "Sort field's parameter must be a positive number.");
+                }
+
+                if(is_exclude_option) {
+                    sort_field_std.exclude_radius = value_meters;
+                } else {
+                    sort_field_std.geo_precision = value_meters;
+                }
             }
 
             double lat = std::stod(geo_parts[0]);
diff --git a/src/index.cpp b/src/index.cpp
index 6f6f4b7d..a36a7adc 100644
--- a/src/index.cpp
+++ b/src/index.cpp
@@ -2145,6 +2145,11 @@ void Index::score_results(const std::vector<sort_by> & sort_fields, const uint16
                     dist = 0;
                 }
 
+                if(sort_fields[i].geo_precision > 0) {
+                    dist = dist + sort_fields[i].geo_precision - 1 -
+                           (dist + sort_fields[i].geo_precision - 1) % sort_fields[i].geo_precision;
+                }
+
                 geopoint_distances[i].emplace(seq_id, dist);
             }
 
diff --git a/test/collection_sorting_test.cpp b/test/collection_sorting_test.cpp
index d85686a5..5c2283b0 100644
--- a/test/collection_sorting_test.cpp
+++ b/test/collection_sorting_test.cpp
@@ -756,7 +756,101 @@ TEST_F(CollectionSortingTest, GeoPointSortingWithExcludeRadius) {
                            {}, geo_sort_fields, {0}, 10, 1, FREQUENCY);
 
     ASSERT_FALSE(res_op.ok());
-    ASSERT_EQ("Sort field's exclude radius unit must be either `km` or `mi`.", res_op.error());
+    ASSERT_EQ("Sort field's parameter unit must be either `km` or `mi`.", res_op.error());
+
+    geo_sort_fields = { sort_by("loc(32.24348, 77.1893, exclude_radius: -10 km)", "ASC") };
+    res_op = coll1->search("*", {}, "loc: (32.24348, 77.1893, 20 km)",
+                           {}, geo_sort_fields, {0}, 10, 1, FREQUENCY);
+
+    ASSERT_FALSE(res_op.ok());
+    ASSERT_EQ("Sort field's parameter must be a positive number.", res_op.error());
 
     collectionManager.drop_collection("coll1");
-}
\ No newline at end of file
+}
+
+TEST_F(CollectionSortingTest, GeoPointSortingWithPrecision) {
+    Collection* coll1;
+
+    std::vector<field> fields = {field("title", field_types::STRING, false),
+                                 field("loc", field_types::GEOPOINT, false),
+                                 field("points", field_types::INT32, false),};
+
+    coll1 = collectionManager.get_collection("coll1").get();
+    if (coll1 == nullptr) {
+        coll1 = collectionManager.create_collection("coll1", 1, fields, "points").get();
+    }
+
+    std::vector<std::vector<std::string>> records = {
+        {"Tibetan Colony",     "32.24678, 77.19239"},
+        {"Civil Hospital",     "32.23959, 77.18763"},
+        {"Johnson Lodge",      "32.24751, 77.18814"},
+
+        {"Lion King Rock",     "32.24493, 77.17038"},
+        {"Jai Durga Handloom", "32.25749, 77.17583"},
+        {"Panduropa",          "32.26059, 77.21798"},
+
+        {"Police Station",     "32.23743, 77.18639"},
+        {"Panduropa Post",     "32.26263, 77.2196"},
+    };
+
+    for (size_t i = 0; i < records.size(); i++) {
+        nlohmann::json doc;
+
+        std::vector<std::string> lat_lng;
+        StringUtils::split(records[i][1], lat_lng, ", ");
+
+        double lat = std::stod(lat_lng[0]);
+        double lng = std::stod(lat_lng[1]);
+
+        doc["id"] = std::to_string(i);
+        doc["title"] = records[i][0];
+        doc["loc"] = {lat, lng};
+        doc["points"] = i;
+
+        ASSERT_TRUE(coll1->add(doc.dump()).ok());
+    }
+
+    std::vector<sort_by> geo_sort_fields = {
+        sort_by("loc(32.24348, 77.1893, precision: 0.9 km)", "ASC"),
+        sort_by("points", "DESC"),
+    };
+
+    auto results = coll1->search("*",
+                                 {}, "loc: (32.24348, 77.1893, 20 km)",
+                                 {}, geo_sort_fields, {0}, 10, 1, FREQUENCY).get();
+
+    ASSERT_EQ(8, results["found"].get<size_t>());
+
+    std::vector<std::string> expected_ids = {
+        "6", "2", "1", "0", "3", "4", "7", "5"
+    };
+
+    for (size_t i = 0; i < expected_ids.size(); i++) {
+        ASSERT_STREQ(expected_ids[i].c_str(), results["hits"][i]["document"]["id"].get<std::string>().c_str());
+    }
+
+    // badly formatted precision
+
+    geo_sort_fields = { sort_by("loc(32.24348, 77.1893, precision 1 km)", "ASC") };
+    auto res_op = coll1->search("*", {}, "loc: (32.24348, 77.1893, 20 km)",
+                                {}, geo_sort_fields, {0}, 10, 1, FREQUENCY);
+
+    ASSERT_FALSE(res_op.ok());
+    ASSERT_EQ("Bad syntax for geopoint sorting field `loc`", res_op.error());
+
+    geo_sort_fields = { sort_by("loc(32.24348, 77.1893, precision: 1 meter)", "ASC") };
+    res_op = coll1->search("*", {}, "loc: (32.24348, 77.1893, 20 km)",
+                           {}, geo_sort_fields, {0}, 10, 1, FREQUENCY);
+
+    ASSERT_FALSE(res_op.ok());
+    ASSERT_EQ("Sort field's parameter unit must be either `km` or `mi`.", res_op.error());
+
+    geo_sort_fields = { sort_by("loc(32.24348, 77.1893, precision: -10 km)", "ASC") };
+    res_op = coll1->search("*", {}, "loc: (32.24348, 77.1893, 20 km)",
+                           {}, geo_sort_fields, {0}, 10, 1, FREQUENCY);
+
+    ASSERT_FALSE(res_op.ok());
+    ASSERT_EQ("Sort field's parameter must be a positive number.", res_op.error());
+
+    collectionManager.drop_collection("coll1");
+}

From b2c12a9b2cef557e57a51aabe708a154ffff91cf Mon Sep 17 00:00:00 2001
From: Kishore Nallan <kishorenc@gmail.com>
Date: Sat, 31 Jul 2021 08:59:49 +0530
Subject: [PATCH 6/6] Fix more edge cases in highlighting.

---
 include/topster.h                 | 66 +++++++++++++++++++++++++++
 src/collection.cpp                | 76 +++++++++++++++++++++++--------
 src/index.cpp                     | 16 +++++--
 test/collection_locale_test.cpp   |  2 +-
 test/collection_manager_test.cpp  |  1 -
 test/collection_specific_test.cpp | 72 +++++++++++++++++++++++++++++
 test/collection_test.cpp          | 67 +++++++++++++++------------
 7 files changed, 247 insertions(+), 53 deletions(-)

diff --git a/include/topster.h b/include/topster.h
index 5550fbd0..19ba746c 100644
--- a/include/topster.h
+++ b/include/topster.h
@@ -16,6 +16,9 @@ struct KV {
     uint64_t distinct_key{};
     int64_t scores[3]{};  // match score + 2 custom attributes
 
+    // to be used only in final aggregation
+    uint64_t* query_indices = nullptr;
+
     KV(uint8_t field_id, uint16_t queryIndex, uint32_t token_bits, uint64_t key, uint64_t distinct_key,
        uint8_t match_score_index, const int64_t *scores):
             field_id(field_id), match_score_index(match_score_index),
@@ -27,6 +30,69 @@ struct KV {
     }
 
     KV() = default;
+
+    KV(KV& kv) = default;
+
+    KV(KV&& kv) noexcept : field_id(kv.field_id), match_score_index(kv.match_score_index),
+                 query_index(kv.query_index), array_index(kv.array_index), token_bits(kv.token_bits),
+                 key(kv.key), distinct_key(kv.distinct_key) {
+
+        scores[0] = kv.scores[0];
+        scores[1] = kv.scores[1];
+        scores[2] = kv.scores[2];
+
+        query_indices = kv.query_indices;
+        kv.query_indices = nullptr;
+    }
+
+    KV& operator=(KV&& kv) noexcept  {
+        if (this != &kv) {
+            field_id = kv.field_id;
+            match_score_index = kv.match_score_index;
+            query_index = kv.query_index;
+            array_index = kv.array_index;
+            token_bits = kv.token_bits;
+            key = kv.key;
+            distinct_key = kv.distinct_key;
+
+            scores[0] = kv.scores[0];
+            scores[1] = kv.scores[1];
+            scores[2] = kv.scores[2];
+
+            delete[] query_indices;
+            query_indices = kv.query_indices;
+            kv.query_indices = nullptr;
+        }
+
+        return *this;
+    }
+
+    KV& operator=(KV& kv) noexcept  {
+        if (this != &kv) {
+            field_id = kv.field_id;
+            match_score_index = kv.match_score_index;
+            query_index = kv.query_index;
+            array_index = kv.array_index;
+            token_bits = kv.token_bits;
+            key = kv.key;
+            distinct_key = kv.distinct_key;
+
+            scores[0] = kv.scores[0];
+            scores[1] = kv.scores[1];
+            scores[2] = kv.scores[2];
+
+            delete[] query_indices;
+            query_indices = kv.query_indices;
+            kv.query_indices = nullptr;
+        }
+
+        return *this;
+    }
+
+    ~KV() {
+        delete [] query_indices;
+        query_indices = nullptr;
+    }
 };
 
 /*
diff --git a/src/collection.cpp b/src/collection.cpp
index 44590a26..640e744b 100644
--- a/src/collection.cpp
+++ b/src/collection.cpp
@@ -1405,12 +1405,22 @@ void Collection::aggregate_topster(size_t query_index, Topster& agg_topster, Top
             Topster* group_topster = group_topster_entry.second;
             for(const auto& map_kv: group_topster->kv_map) {
                 map_kv.second->query_index += query_index;
+                if(map_kv.second->query_indices != nullptr) {
+                    for(size_t i = 0; i < map_kv.second->query_indices[0]; i++) {
+                        map_kv.second->query_indices[i+1] += query_index;
+                    }
+                }
                 agg_topster.add(map_kv.second);
             }
         }
     } else {
         for(const auto& map_kv: index_topster->kv_map) {
             map_kv.second->query_index += query_index;
+            if(map_kv.second->query_indices != nullptr) {
+                for(size_t i = 0; i < map_kv.second->query_indices[0]; i++) {
+                    map_kv.second->query_indices[i+1] += query_index;
+                }
+            }
             agg_topster.add(map_kv.second);
         }
     }
@@ -1516,25 +1526,38 @@ void Collection::highlight_result(const field &search_field,
     std::vector<art_leaf*> query_suggestion;
     std::set<std::string> query_suggestion_tokens;
 
-    for (const art_leaf *token_leaf : searched_queries[field_order_kv->query_index]) {
-        // Must search for the token string fresh on that field for the given document since `token_leaf`
-        // is from the best matched field and need not be present in other fields of a document.
-        Index* index = indices[field_order_kv->key % num_memory_shards];
-        art_leaf *actual_leaf = index->get_token_leaf(search_field.name, &token_leaf->key[0], token_leaf->key_len);
+    size_t qindex = 0;
 
-        //LOG(INFO) << "field: " << search_field.name << ", key: " << token_leaf->key;
+    do {
+        auto searched_query =
+                (field_order_kv->query_indices == nullptr) ? searched_queries[field_order_kv->query_index] :
+                searched_queries[field_order_kv->query_indices[qindex + 1]];
+
+        for (art_leaf* token_leaf : searched_query) {
+            // Must search for the token string fresh on that field for the given document since `token_leaf`
+            // is from the best matched field and need not be present in other fields of a document.
+            Index* index = indices[field_order_kv->key % num_memory_shards];
+            art_leaf* actual_leaf = index->get_token_leaf(search_field.name, &token_leaf->key[0], token_leaf->key_len);
+
+            if(actual_leaf == nullptr) {
+                continue;
+            }
 
-        if(actual_leaf != nullptr) {
-            query_suggestion.push_back(actual_leaf);
-            std::string token(reinterpret_cast<char*>(actual_leaf->key), actual_leaf->key_len-1);
-            query_suggestion_tokens.insert(token);
-            std::vector<uint16_t> positions;
             uint32_t doc_index = actual_leaf->values->ids.indexOf(field_order_kv->key);
-            auto doc_indices = new uint32_t[1];
-            doc_indices[0] = doc_index;
-            leaf_to_indices.push_back(doc_indices);
+            if(doc_index != actual_leaf->values->ids.getLength()) {
+                query_suggestion.push_back(actual_leaf);
+                std::string token(reinterpret_cast<char*>(actual_leaf->key), actual_leaf->key_len - 1);
+                //LOG(INFO) << "field: " << search_field.name << ", key: " << token;
+                query_suggestion_tokens.insert(token);
+                auto doc_indices = new uint32_t[1];
+                doc_indices[0] = doc_index;
+                leaf_to_indices.push_back(doc_indices);
+            }
         }
-    }
+
+        qindex++;
+    } while(field_order_kv->query_indices != nullptr && qindex < field_order_kv->query_indices[0]);
+
 
     if(query_suggestion.size() != q_tokens.size()) {
         // can happen for compound query matched across 2 fields when some tokens are dropped
@@ -1543,6 +1566,8 @@ void Collection::highlight_result(const field &search_field,
                 continue;
             }
 
+            query_suggestion_tokens.insert(q_token);
+
             Index* index = indices[field_order_kv->key % num_memory_shards];
             art_leaf *actual_leaf = index->get_token_leaf(search_field.name,
                                                           reinterpret_cast<const unsigned char *>(q_token.c_str()),
@@ -1638,9 +1663,10 @@ void Collection::highlight_result(const field &search_field,
 
         highlight.matched_tokens.emplace_back();
         std::vector<std::string>& matched_tokens = highlight.matched_tokens.back();
+        bool found_first_match = false;
 
         while(tokenizer.next(raw_token, raw_token_index, tok_start, tok_end)) {
-            if(token_offsets.empty()) {
+            if(!found_first_match) {
                 if(snippet_start_window.size() == highlight_affix_num_tokens + 1) {
                     snippet_start_window.pop_front();
                 }
@@ -1648,7 +1674,10 @@ void Collection::highlight_result(const field &search_field,
                 snippet_start_window.push_back(tok_start);
             }
 
-            if (token_hits.count(raw_token) != 0 ||
+            bool token_already_found = token_hits.count(raw_token) != 0;
+
+            // ensures that the `snippet_start_offset` is always from a matched token, and not from query suggestion
+            if ((found_first_match && token_already_found) ||
                 (match_offset_index < match.offsets.size() &&
                  match.offsets[match_offset_index].offset == raw_token_index)) {
 
@@ -1661,9 +1690,15 @@ void Collection::highlight_result(const field &search_field,
                 } while(match_offset_index < match.offsets.size() &&
                         match.offsets[match_offset_index - 1].offset == match.offsets[match_offset_index].offset);
 
-                if(token_offsets.size() == 1) {
+                if(!found_first_match) {
                     snippet_start_offset = snippet_start_window.front();
                 }
+
+                found_first_match = true;
+
+            } else if(query_suggestion_tokens.find(raw_token) != query_suggestion_tokens.end()) {
+                token_offsets.emplace(tok_start, tok_end);
+                token_hits.insert(raw_token);
             }
 
             if(raw_token_index == last_valid_offset + highlight_affix_num_tokens) {
@@ -1693,6 +1728,11 @@ void Collection::highlight_result(const field &search_field,
         auto offset_it = token_offsets.begin();
         std::stringstream highlighted_text;
 
+        // tokens from query might occur before actual snippet start offset: we skip that
+        while(offset_it != token_offsets.end() && offset_it->first < snippet_start_offset) {
+            offset_it++;
+        }
+
         for(size_t i = snippet_start_offset; i <= snippet_end_offset; i++) {
             if(offset_it != token_offsets.end()) {
                 if (i == offset_it->first) {
diff --git a/src/index.cpp b/src/index.cpp
index a36a7adc..ee8debdf 100644
--- a/src/index.cpp
+++ b/src/index.cpp
@@ -995,6 +995,11 @@ void Index::search_candidates(const uint8_t & field_id,
 
             field_num_results += filtered_results_size;
 
+            /*if(filtered_results_size != 0) {
+                LOG(INFO) << size_t(field_id) << " - " << log_query.str() << ", filtered_results_size: " << filtered_results_size
+                          << ", popcount: " << (__builtin_popcount(token_bits) - 1);
+            }*/
+
             delete[] filtered_result_ids;
             delete[] result_ids;
         } else {
@@ -1695,13 +1700,18 @@ void Index::search(const std::vector<query_tokens_t>& field_query_tokens,
             auto& kvs = seq_id_kvs.second; // each `kv` can be from a different field
 
             std::sort(kvs.begin(), kvs.end(), Topster::is_greater);
+            kvs[0]->query_indices = new uint64_t[kvs.size() + 1];
+            kvs[0]->query_indices[0] = kvs.size();
 
-            // LOG(INFO) << "DOC ID: " << seq_id << ", score: " << kvs[0]->scores[kvs[0]->match_score_index];
+            //LOG(INFO) << "DOC ID: " << seq_id << ", score: " << kvs[0]->scores[kvs[0]->match_score_index];
 
             // to calculate existing aggregate scores across best matching fields
             spp::sparse_hash_map<uint8_t, KV*> existing_field_kvs;
-            for(const auto kv: kvs) {
-                existing_field_kvs.emplace(kv->field_id, kv);
+            for(size_t kv_i = 0; kv_i < kvs.size(); kv_i++) {
+                existing_field_kvs.emplace(kvs[kv_i]->field_id, kvs[kv_i]);
+                kvs[0]->query_indices[kv_i+1] = kvs[kv_i]->query_index;
+                /*LOG(INFO) << "kv_i: " << kv_i << ", kvs[kv_i]->query_index: " << kvs[kv_i]->query_index << ", "
+                          << "searched_query: " << searched_queries[kvs[kv_i]->query_index][0];*/
             }
 
             uint32_t token_bits = (uint32_t(1) << 31);      // top most bit set to guarantee atleast 1 bit set
diff --git a/test/collection_locale_test.cpp b/test/collection_locale_test.cpp
index c6f5b63a..75fe60b1 100644
--- a/test/collection_locale_test.cpp
+++ b/test/collection_locale_test.cpp
@@ -263,7 +263,7 @@ TEST_F(CollectionLocaleTest, SearchAgainstThaiTextExactMatch) {
     ASSERT_EQ("ติดกับดัก<mark>ราย</mark><mark>ได้</mark>ปานกลาง",
               results["hits"][0]["highlights"][0]["snippet"].get<std::string>());
 
-    ASSERT_EQ("ข้อมูลรายคนหรือ<mark>ราย</mark>บริษัทในการเชื่อมโยงส่วน<mark>ได้</mark>ส่วนเสีย",
+    ASSERT_EQ("ข้อมูล<mark>ราย</mark>คนหรือ<mark>ราย</mark>บริษัทในการเชื่อมโยงส่วน<mark>ได้</mark>ส่วนเสีย",
               results["hits"][1]["highlights"][0]["snippet"].get<std::string>());
 
 }
diff --git a/test/collection_manager_test.cpp b/test/collection_manager_test.cpp
index 8b3f507a..ac87bfc4 100644
--- a/test/collection_manager_test.cpp
+++ b/test/collection_manager_test.cpp
@@ -232,7 +232,6 @@ TEST_F(CollectionManagerTest, RestoreRecordsOnRestart) {
     std::vector<std::string> facets;
 
     nlohmann::json results = collection1->search("thomas", search_fields, "", facets, sort_fields, {0}, 10, 1, FREQUENCY, {false}).get();
-    LOG(INFO) << results;
     ASSERT_EQ(4, results["hits"].size());
 
     std::unordered_map<std::string, field> schema = collection1->get_schema();
diff --git a/test/collection_specific_test.cpp b/test/collection_specific_test.cpp
index c1855479..bf74f410 100644
--- a/test/collection_specific_test.cpp
+++ b/test/collection_specific_test.cpp
@@ -648,3 +648,75 @@ TEST_F(CollectionSpecificTest, HandleBadCharactersInStringGracefully) {
 
     collectionManager.drop_collection("coll1");
 }
+
+TEST_F(CollectionSpecificTest, HighlightSecondaryFieldWithPrefixMatch) {
+    std::vector<field> fields = {field("title", field_types::STRING, false),
+                                 field("description", field_types::STRING, false),
+                                 field("points", field_types::INT32, false),};
+
+    Collection* coll1 = collectionManager.create_collection("coll1", 1, fields, "points").get();
+
+    nlohmann::json doc1;
+    doc1["id"] = "0";
+    doc1["title"] = "Functions and Equations";
+    doc1["description"] = "Use a function to solve an equation.";
+    doc1["points"] = 100;
+
+    nlohmann::json doc2;
+    doc2["id"] = "1";
+    doc2["title"] = "Function of effort";
+    doc2["description"] = "Learn all about it.";
+    doc2["points"] = 100;
+
+    ASSERT_TRUE(coll1->add(doc1.dump()).ok());
+    ASSERT_TRUE(coll1->add(doc2.dump()).ok());
+
+    auto results = coll1->search("function", {"title", "description"}, "", {}, {}, {0}, 10,
+                                 1, FREQUENCY, {true, true},
+                                 10, spp::sparse_hash_set<std::string>(),
+                                 spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 40, {}, {}, {}, 0,
+                                 "<mark>", "</mark>", {1, 1}).get();
+
+    ASSERT_EQ("0", results["hits"][0]["document"]["id"].get<std::string>());
+
+    ASSERT_EQ(2, results["hits"][0]["highlights"].size());
+
+    ASSERT_EQ("<mark>Functions</mark> and Equations",
+              results["hits"][0]["highlights"][0]["snippet"].get<std::string>());
+
+    ASSERT_EQ("Use a <mark>function</mark> to solve an equation.",
+              results["hits"][0]["highlights"][1]["snippet"].get<std::string>());
+
+    collectionManager.drop_collection("coll1");
+}
+
+TEST_F(CollectionSpecificTest, HighlightWithDropTokens) {
+    std::vector<field> fields = {field("description", field_types::STRING, false),
+                                 field("points", field_types::INT32, false),};
+
+    Collection* coll1 = collectionManager.create_collection("coll1", 1, fields, "points").get();
+
+    nlohmann::json doc1;
+    doc1["id"] = "0";
+    doc1["description"] = "HPE Aruba AP-575 802.11ax Wireless Access Point - TAA Compliant - 2.40 GHz, "
+                          "5 GHz - MIMO Technology - 1 x Network (RJ-45) - Gigabit Ethernet - Bluetooth 5";
+    doc1["points"] = 100;
+
+    ASSERT_TRUE(coll1->add(doc1.dump()).ok());
+
+    auto results = coll1->search("HPE Aruba AP-575 Technology Gigabit Bluetooth 5", {"description"}, "", {}, {}, {0}, 10,
+                                 1, FREQUENCY, {true},
+                                 10, spp::sparse_hash_set<std::string>(),
+                                 spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "description", 40, {}, {}, {}, 0,
+                                 "<mark>", "</mark>").get();
+
+    ASSERT_EQ(1, results["hits"][0]["highlights"].size());
+    ASSERT_EQ("0", results["hits"][0]["document"]["id"].get<std::string>());
+
+    ASSERT_EQ("<mark>HPE</mark> <mark>Aruba</mark> <mark>AP-575</mark> 802.11ax Wireless Access Point - "
+              "TAA Compliant - 2.40 GHz, <mark>5</mark> GHz - MIMO <mark>Technology</mark> - 1 x Network (RJ-45) - "
+              "<mark>Gigabit</mark> Ethernet - <mark>Bluetooth</mark> <mark>5</mark>",
+              results["hits"][0]["highlights"][0]["snippet"].get<std::string>());
+
+    collectionManager.drop_collection("coll1");
+}
diff --git a/test/collection_test.cpp b/test/collection_test.cpp
index 0e9221a2..8f573e6e 100644
--- a/test/collection_test.cpp
+++ b/test/collection_test.cpp
@@ -746,23 +746,23 @@ TEST_F(CollectionTest, ArrayStringFieldHighlight) {
         ASSERT_STREQ(id.c_str(), result_id.c_str());
     }
 
-    ASSERT_EQ(4, results["hits"][0]["highlights"][0].size());
-    ASSERT_STREQ(results["hits"][0]["highlights"][0]["field"].get<std::string>().c_str(), "tags");
-    ASSERT_EQ(2, results["hits"][0]["highlights"][0]["snippets"].size());
-    ASSERT_STREQ("<mark>truth</mark>", results["hits"][0]["highlights"][0]["snippets"][0].get<std::string>().c_str());
-    ASSERT_STREQ("plain <mark>truth</mark>", results["hits"][0]["highlights"][0]["snippets"][1].get<std::string>().c_str());
-    ASSERT_EQ(2, results["hits"][0]["highlights"][0]["matched_tokens"].size());
-    ASSERT_STREQ("truth", results["hits"][0]["highlights"][0]["matched_tokens"][0][0].get<std::string>().c_str());
-    ASSERT_STREQ("truth", results["hits"][0]["highlights"][0]["matched_tokens"][1][0].get<std::string>().c_str());
-    ASSERT_EQ(2, results["hits"][0]["highlights"][0]["indices"].size());
-    ASSERT_EQ(1, results["hits"][0]["highlights"][0]["indices"][0]);
-    ASSERT_EQ(2, results["hits"][0]["highlights"][0]["indices"][1]);
+    ASSERT_EQ(3, results["hits"][0]["highlights"][0].size());
+    ASSERT_STREQ("title", results["hits"][0]["highlights"][0]["field"].get<std::string>().c_str());
+    ASSERT_STREQ("Plain <mark>Truth</mark>", results["hits"][0]["highlights"][0]["snippet"].get<std::string>().c_str());
+    ASSERT_EQ(1, results["hits"][0]["highlights"][0]["matched_tokens"].size());
+    ASSERT_STREQ("Truth", results["hits"][0]["highlights"][0]["matched_tokens"][0].get<std::string>().c_str());
 
-    ASSERT_EQ(3, results["hits"][0]["highlights"][1].size());
-    ASSERT_STREQ("title", results["hits"][0]["highlights"][1]["field"].get<std::string>().c_str());
-    ASSERT_STREQ("Plain <mark>Truth</mark>", results["hits"][0]["highlights"][1]["snippet"].get<std::string>().c_str());
-    ASSERT_EQ(1, results["hits"][0]["highlights"][1]["matched_tokens"].size());
-    ASSERT_STREQ("Truth", results["hits"][0]["highlights"][1]["matched_tokens"][0].get<std::string>().c_str());
+    ASSERT_EQ(4, results["hits"][0]["highlights"][1].size());
+    ASSERT_STREQ(results["hits"][0]["highlights"][1]["field"].get<std::string>().c_str(), "tags");
+    ASSERT_EQ(2, results["hits"][0]["highlights"][1]["snippets"].size());
+    ASSERT_STREQ("<mark>truth</mark>", results["hits"][0]["highlights"][1]["snippets"][0].get<std::string>().c_str());
+    ASSERT_STREQ("plain <mark>truth</mark>", results["hits"][0]["highlights"][1]["snippets"][1].get<std::string>().c_str());
+    ASSERT_EQ(2, results["hits"][0]["highlights"][1]["matched_tokens"].size());
+    ASSERT_STREQ("truth", results["hits"][0]["highlights"][1]["matched_tokens"][0][0].get<std::string>().c_str());
+    ASSERT_STREQ("truth", results["hits"][0]["highlights"][1]["matched_tokens"][1][0].get<std::string>().c_str());
+    ASSERT_EQ(2, results["hits"][0]["highlights"][1]["indices"].size());
+    ASSERT_EQ(1, results["hits"][0]["highlights"][1]["indices"][0]);
+    ASSERT_EQ(2, results["hits"][0]["highlights"][1]["indices"][1]);
 
     ASSERT_EQ(3, results["hits"][1]["highlights"][0].size());
     ASSERT_STREQ("title", results["hits"][1]["highlights"][0]["field"].get<std::string>().c_str());
@@ -2456,15 +2456,15 @@ TEST_F(CollectionTest, SearchHighlightFieldFully) {
                         spp::sparse_hash_set<std::string>(), 10, "", 5, 5, "title, tags").get();
 
     ASSERT_EQ(2, res["hits"][0]["highlights"].size());
-    ASSERT_EQ("<mark>LAZY</mark>", res["hits"][0]["highlights"][0]["values"][0].get<std::string>());
     ASSERT_EQ("The quick brown fox jumped over the <mark>lazy</mark> dog and ran straight to the forest to sleep.",
-                 res["hits"][0]["highlights"][1]["value"].get<std::string>());
+              res["hits"][0]["highlights"][0]["value"].get<std::string>());
+    ASSERT_EQ(1, res["hits"][0]["highlights"][0]["matched_tokens"].size());
+    ASSERT_STREQ("lazy", res["hits"][0]["highlights"][0]["matched_tokens"][0].get<std::string>().c_str());
 
-    ASSERT_EQ(1, res["hits"][0]["highlights"][1]["matched_tokens"].size());
-    ASSERT_STREQ("lazy", res["hits"][0]["highlights"][1]["matched_tokens"][0].get<std::string>().c_str());
-
-    ASSERT_EQ(1, res["hits"][0]["highlights"][0]["values"][0].size());
-    ASSERT_STREQ("<mark>LAZY</mark>", res["hits"][0]["highlights"][0]["values"][0].get<std::string>().c_str());
+    ASSERT_EQ(1, res["hits"][0]["highlights"][1]["values"].size());
+    ASSERT_EQ("<mark>LAZY</mark>", res["hits"][0]["highlights"][1]["values"][0].get<std::string>());
+    ASSERT_EQ(1, res["hits"][0]["highlights"][1]["snippets"].size());
+    ASSERT_EQ("<mark>LAZY</mark>", res["hits"][0]["highlights"][1]["snippets"][0].get<std::string>());
 
     // excluded fields should not be returned in highlights section
     spp::sparse_hash_set<std::string> excluded_fields = {"tags"};
@@ -3146,13 +3146,17 @@ TEST_F(CollectionTest, MultiFieldRelevance5) {
     ASSERT_STREQ("1", results["hits"][1]["document"]["id"].get<std::string>().c_str());
     ASSERT_STREQ("2", results["hits"][2]["document"]["id"].get<std::string>().c_str());
 
-    ASSERT_EQ(1, results["hits"][0]["highlights"].size());
-    ASSERT_EQ("country", results["hits"][0]["highlights"][0]["field"].get<std::string>());
-    ASSERT_EQ("<mark>Canada</mark>", results["hits"][0]["highlights"][0]["snippet"].get<std::string>());
+    ASSERT_EQ(2, results["hits"][0]["highlights"].size());
+    ASSERT_EQ("field_a", results["hits"][0]["highlights"][0]["field"].get<std::string>());
+    ASSERT_EQ("<mark>Canadia</mark>", results["hits"][0]["highlights"][0]["snippet"].get<std::string>());
+    ASSERT_EQ("country", results["hits"][0]["highlights"][1]["field"].get<std::string>());
+    ASSERT_EQ("<mark>Canada</mark>", results["hits"][0]["highlights"][1]["snippet"].get<std::string>());
 
-    ASSERT_EQ(1, results["hits"][1]["highlights"].size());
-    ASSERT_EQ("company_name", results["hits"][1]["highlights"][0]["field"].get<std::string>());
-    ASSERT_EQ("<mark>Canaida</mark> Corp", results["hits"][1]["highlights"][0]["snippet"].get<std::string>());
+    ASSERT_EQ(2, results["hits"][1]["highlights"].size());
+    ASSERT_EQ("field_a", results["hits"][1]["highlights"][0]["field"].get<std::string>());
+    ASSERT_EQ("<mark>Canadoo</mark>", results["hits"][1]["highlights"][0]["snippet"].get<std::string>());
+    ASSERT_EQ("company_name", results["hits"][1]["highlights"][1]["field"].get<std::string>());
+    ASSERT_EQ("<mark>Canaida</mark> Corp", results["hits"][1]["highlights"][1]["snippet"].get<std::string>());
 
     ASSERT_EQ(1, results["hits"][2]["highlights"].size());
     ASSERT_EQ("field_a", results["hits"][2]["highlights"][0]["field"].get<std::string>());
@@ -3317,7 +3321,7 @@ TEST_F(CollectionTest, MultiFieldHighlighting) {
 
     ASSERT_STREQ("0", results["hits"][0]["document"]["id"].get<std::string>().c_str());
 
-    ASSERT_EQ(2, results["hits"][0]["highlights"].size());
+    ASSERT_EQ(3, results["hits"][0]["highlights"].size());
     ASSERT_EQ("name", results["hits"][0]["highlights"][0]["field"].get<std::string>());
     ASSERT_EQ("Best Wireless Vehicle <mark>Charger</mark>",
               results["hits"][0]["highlights"][0]["snippet"].get<std::string>());
@@ -3326,6 +3330,9 @@ TEST_F(CollectionTest, MultiFieldHighlighting) {
     ASSERT_EQ("Easily replenish your cell phone with this wireless <mark>charger.</mark>",
               results["hits"][0]["highlights"][1]["snippet"].get<std::string>());
 
+    ASSERT_EQ("categories", results["hits"][0]["highlights"][2]["field"].get<std::string>());
+    ASSERT_EQ("Car <mark>Chargers</mark>", results["hits"][0]["highlights"][2]["snippets"][0].get<std::string>());
+
     results = coll1->search("John With Denver",
                             {"description"}, "", {}, {}, {0}, 10, 1, FREQUENCY,
                             {true}, 1, spp::sparse_hash_set<std::string>(),