From 309e973de593ae18b6f23a350256bf7a278edb5e Mon Sep 17 00:00:00 2001
From: Kishore Nallan <kishorenc@gmail.com>
Date: Sat, 4 Feb 2023 21:05:14 +0530
Subject: [PATCH 1/5] Properly release buffered export body.

---
 src/core_api.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/core_api.cpp b/src/core_api.cpp
index 389d3904..43ce94df 100644
--- a/src/core_api.cpp
+++ b/src/core_api.cpp
@@ -697,7 +697,7 @@ bool get_export_documents(const std::shared_ptr<http_req>& req, const std::share
     if(export_state->it != nullptr) {
         rocksdb::Iterator* it = export_state->it;
         size_t batch_counter = 0;
-        res->body.clear();
+        std::string().swap(res->body);
 
         while(it->Valid() && it->key().ToString().compare(0, seq_id_prefix.size(), seq_id_prefix) == 0) {
             if(export_state->include_fields.empty() && export_state->exclude_fields.empty()) {

From f40637fe643d680792d7953a7d786f352ff03efb Mon Sep 17 00:00:00 2001
From: Kishore Nallan <kishorenc@gmail.com>
Date: Thu, 2 Feb 2023 16:48:18 +0530
Subject: [PATCH 2/5] Enabling exhaustive search should automatically drop
 tokens.

---
 src/index.cpp                          |  2 +-
 test/collection_specific_more_test.cpp | 28 ++++++++++++++++++++++++++
 2 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/src/index.cpp b/src/index.cpp
index 94f084cd..f599eab6 100644
--- a/src/index.cpp
+++ b/src/index.cpp
@@ -2708,7 +2708,7 @@ void Index::search(std::vector<query_tokens_t>& field_query_tokens, const std::v
 
         // gather up both original query and synonym queries and do drop tokens
 
-        if (all_result_ids_len < drop_tokens_threshold) {
+        if (exhaustive_search || all_result_ids_len < drop_tokens_threshold) {
             for (size_t qi = 0; qi < all_queries.size(); qi++) {
                 auto& orig_tokens = all_queries[qi];
                 size_t num_tokens_dropped = 0;
diff --git a/test/collection_specific_more_test.cpp b/test/collection_specific_more_test.cpp
index 608c5fea..3527bbec 100644
--- a/test/collection_specific_more_test.cpp
+++ b/test/collection_specific_more_test.cpp
@@ -1782,6 +1782,34 @@ TEST_F(CollectionSpecificMoreTest, SearchCutoffTest) {
     ASSERT_EQ(408, coll_op.code());
 }
 
+TEST_F(CollectionSpecificMoreTest, ExhaustiveSearchWithoutExplicitDropTokens) {
+    nlohmann::json schema = R"({
+        "name": "coll1",
+        "fields": [
+            {"name": "title", "type": "string"}
+        ]
+    })"_json;
+
+    Collection* coll1 = collectionManager.create_collection(schema).get();
+
+    nlohmann::json doc;
+    doc["title"] = "alpha beta gamma";
+    ASSERT_TRUE(coll1->add(doc.dump()).ok());
+
+    doc["title"] = "alpha";
+    ASSERT_TRUE(coll1->add(doc.dump()).ok());
+
+    bool exhaustive_search = true;
+    size_t drop_tokens_threshold = 1;
+
+    auto res = coll1->search("alpha beta", {"title"}, "", {}, {}, {0}, 3, 1, FREQUENCY, {false}, drop_tokens_threshold,
+                                 spp::sparse_hash_set<std::string>(),
+                                 spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "title", 20, {}, {}, {}, 0,
+                                 "<mark>", "</mark>", {}, 1000, true, false, true, "", exhaustive_search).get();
+
+    ASSERT_EQ(2, res["hits"].size());
+}
+
 TEST_F(CollectionSpecificMoreTest, CrossFieldTypoAndPrefixWithWeights) {
     nlohmann::json schema = R"({
             "name": "coll1",

From 2c8b8d6af143fc382cca3d911efb4b317761a036 Mon Sep 17 00:00:00 2001
From: Kishore Nallan <kishorenc@gmail.com>
Date: Fri, 3 Feb 2023 18:34:51 +0530
Subject: [PATCH 3/5] Add test for eval expression validation.

---
 src/collection.cpp               | 4 ++++
 test/collection_sorting_test.cpp | 9 +++++++++
 2 files changed, 13 insertions(+)

diff --git a/src/collection.cpp b/src/collection.cpp
index 09e539a9..5a1d8a54 100644
--- a/src/collection.cpp
+++ b/src/collection.cpp
@@ -586,6 +586,10 @@ Option<bool> Collection::validate_and_standardize_sort_fields(const std::vector<
                 const std::string& filter_exp = sort_field_std.name.substr(paran_start + 1,
                                                                            sort_field_std.name.size() - paran_start -
                                                                            2);
+                if(filter_exp.empty()) {
+                    return Option<bool>(400, "The eval expression in sort_by is empty.");
+                }
+
                 Option<bool> parse_filter_op = filter::parse_filter_query(filter_exp, search_schema,
                                                                           store, "", sort_field_std.eval.filter_tree_root);
                 if(!parse_filter_op.ok()) {
diff --git a/test/collection_sorting_test.cpp b/test/collection_sorting_test.cpp
index 80ffbaf5..afd678c1 100644
--- a/test/collection_sorting_test.cpp
+++ b/test/collection_sorting_test.cpp
@@ -2013,6 +2013,15 @@ TEST_F(CollectionSortingTest, OptionalFilteringViaSortingWildcard) {
     ASSERT_FALSE(res_op.ok());
     ASSERT_EQ("Error parsing eval expression in sort_by clause.", res_op.error());
 
+    // when eval condition is empty
+    sort_fields = {
+        sort_by("_eval()", "DESC"),
+        sort_by("points", "DESC"),
+    };
+    res_op = coll1->search("*", {"title"}, "", {}, sort_fields, {2}, 10, 1, FREQUENCY, {true}, 10);
+    ASSERT_FALSE(res_op.ok());
+    ASSERT_EQ("The eval expression in sort_by is empty.", res_op.error());
+
     // more bad syntax!
     sort_fields = {
         sort_by(")", "DESC"),

From 0579398993311a9278f08ed0d0f03b96c32fbc0c Mon Sep 17 00:00:00 2001
From: Kishore Nallan <kishorenc@gmail.com>
Date: Sun, 5 Feb 2023 10:58:40 +0530
Subject: [PATCH 4/5] Allow null values for optional nested fields.

---
 src/field.cpp                          |  5 +++++
 test/collection_nested_fields_test.cpp | 26 ++++++++++++++++++++++----
 2 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/src/field.cpp b/src/field.cpp
index fb334dc4..97e385af 100644
--- a/src/field.cpp
+++ b/src/field.cpp
@@ -721,6 +721,11 @@ Option<bool> field::flatten_field(nlohmann::json& doc, nlohmann::json& obj, cons
         // end of path: check if obj matches expected type
         std::string detected_type;
         if(!field::get_type(obj, detected_type)) {
+            if(obj.is_null() && the_field.optional) {
+                // null values are allowed only if field is optional
+                return Option<bool>(false);
+            }
+
             return Option<bool>(400, "Field `" + the_field.name + "` has an incorrect type.");
         }
 
diff --git a/test/collection_nested_fields_test.cpp b/test/collection_nested_fields_test.cpp
index 2fa978e5..997026a7 100644
--- a/test/collection_nested_fields_test.cpp
+++ b/test/collection_nested_fields_test.cpp
@@ -1390,7 +1390,8 @@ TEST_F(CollectionNestedFieldsTest, ExplicitSchemaOptionalFieldValidation) {
         "fields": [
           {"name": "details", "type": "object", "optional": true },
           {"name": "company.name", "type": "string", "optional": true },
-          {"name": "locations", "type": "object[]", "optional": true }
+          {"name": "locations", "type": "object[]", "optional": true },
+          {"name": "blocks.text.description", "type": "string[]", "optional": true }
         ]
     })"_json;
 
@@ -1398,14 +1399,31 @@ TEST_F(CollectionNestedFieldsTest, ExplicitSchemaOptionalFieldValidation) {
     ASSERT_TRUE(op.ok());
     Collection* coll1 = op.get();
 
-    // no optional field is present and that should be allowed
+    // when a nested field is null it should be allowed
     auto doc1 = R"({
-        "foo": "bar"
+        "company": {"name": null}
     })"_json;
 
     auto add_op = coll1->add(doc1.dump(), CREATE);
     ASSERT_TRUE(add_op.ok());
 
+    // check the same with nested array type
+
+    doc1 = R"({
+        "blocks": {"text": [{"description": null}]}
+    })"_json;
+
+    add_op = coll1->add(doc1.dump(), CREATE);
+    ASSERT_TRUE(add_op.ok());
+
+    // no optional field is present and that should be allowed
+    doc1 = R"({
+        "foo": "bar"
+    })"_json;
+
+    add_op = coll1->add(doc1.dump(), CREATE);
+    ASSERT_TRUE(add_op.ok());
+
     // some parts of an optional field is present in a subsequent doc indexed
     auto doc2 = R"({
         "details": {"name": "foo"}
@@ -1421,7 +1439,7 @@ TEST_F(CollectionNestedFieldsTest, ExplicitSchemaOptionalFieldValidation) {
 
     // check fields and their properties
     auto coll_fields = coll1->get_fields();
-    ASSERT_EQ(5, coll_fields.size());
+    ASSERT_EQ(6, coll_fields.size());
     for(auto& coll_field : coll_fields) {
         ASSERT_TRUE(coll_field.optional);
     }

From 86535e24aa8aa1523480336ae57e7fdf885f0bb2 Mon Sep 17 00:00:00 2001
From: Kishore Nallan <kishorenc@gmail.com>
Date: Sun, 5 Feb 2023 13:20:02 +0530
Subject: [PATCH 5/5] Improve error message for nested array object string
 field.

---
 src/index.cpp                          |  8 ++++++++
 test/collection_nested_fields_test.cpp | 25 +++++++++++++++++++++++++
 2 files changed, 33 insertions(+)

diff --git a/src/index.cpp b/src/index.cpp
index f599eab6..c3db97b7 100644
--- a/src/index.cpp
+++ b/src/index.cpp
@@ -5557,6 +5557,10 @@ Option<uint32_t> Index::coerce_string(const DIRTY_VALUES& dirty_values, const st
     else {
         if(dirty_values == DIRTY_VALUES::COERCE_OR_DROP) {
             if(!a_field.optional) {
+                if(a_field.nested && item.is_array()) {
+                    return Option<>(400, "Field `" + field_name + "` has an incorrect type. "
+                                      "Hint: field inside an array of objects must be an array type as well.");
+                }
                 return Option<>(400, "Field `" + field_name  + "` must be " + suffix + " string.");
             }
 
@@ -5568,6 +5572,10 @@ Option<uint32_t> Index::coerce_string(const DIRTY_VALUES& dirty_values, const st
             }
         } else {
             // COERCE_OR_REJECT / non-optional + DROP
+            if(a_field.nested && item.is_array()) {
+                return Option<>(400, "Field `" + field_name + "` has an incorrect type. "
+                                      "Hint: field inside an array of objects must be an array type as well.");
+            }
             return Option<>(400, "Field `" + field_name  + "` must be " + suffix + " string.");
         }
     }
diff --git a/test/collection_nested_fields_test.cpp b/test/collection_nested_fields_test.cpp
index 997026a7..87a58145 100644
--- a/test/collection_nested_fields_test.cpp
+++ b/test/collection_nested_fields_test.cpp
@@ -1445,6 +1445,31 @@ TEST_F(CollectionNestedFieldsTest, ExplicitSchemaOptionalFieldValidation) {
     }
 }
 
+TEST_F(CollectionNestedFieldsTest, ExplicitSchemaForNestedArrayTypeValidation) {
+    nlohmann::json schema = R"({
+        "name": "coll1",
+        "enable_nested_fields": true,
+        "fields": [
+            {"name": "blocks.text", "type": "object[]"},
+            {"name": "blocks.text.description", "type": "string"}
+        ]
+    })"_json;
+
+    auto op = collectionManager.create_collection(schema);
+    ASSERT_TRUE(op.ok());
+    Collection* coll1 = op.get();
+
+    auto doc1 = R"({
+        "blocks": {"text": [{"description": "Hello world."}]}
+    })"_json;
+
+    auto add_op = coll1->add(doc1.dump(), CREATE);
+
+    ASSERT_FALSE(add_op.ok());
+    ASSERT_EQ("Field `blocks.text.description` has an incorrect type. "
+              "Hint: field inside an array of objects must be an array type as well.", add_op.error());
+}
+
 TEST_F(CollectionNestedFieldsTest, SortByNestedField) {
     nlohmann::json schema = R"({
         "name": "coll1",