From 10c1f4c5c1b87fc85b21352553a023e408c2721b Mon Sep 17 00:00:00 2001
From: Kishore Nallan <kishorenc@gmail.com>
Date: Tue, 8 Aug 2023 11:39:18 +0530
Subject: [PATCH 01/16] Fix schema validation of non-optional, null nested
 values.

---
 include/field.h                        |   7 +-
 src/field.cpp                          |  53 ++++++++---
 test/collection_nested_fields_test.cpp | 125 +++++++++++++++++++++++++
 3 files changed, 169 insertions(+), 16 deletions(-)
diff --git a/include/field.h b/include/field.h
index 6b4ee937..d34d32ae 100644
--- a/include/field.h
+++ b/include/field.h
@@ -23,6 +23,7 @@ namespace field_types {
     static const std::string INT64 = "int64";
     static const std::string FLOAT = "float";
     static const std::string BOOL = "bool";
+    static const std::string NIL = "nil";
     static const std::string GEOPOINT = "geopoint";
     static const std::string STRING_ARRAY = "string[]";
     static const std::string INT32_ARRAY = "int32[]";
@@ -429,19 +430,19 @@ struct field {
                                                        std::vector<field>& fields_vec);
 
     static bool flatten_obj(nlohmann::json& doc, nlohmann::json& value, bool has_array, bool has_obj_array,
-                            const field& the_field, const std::string& flat_name,
+                            bool is_update, const field& the_field, const std::string& flat_name,
                             const std::unordered_map<std::string, field>& dyn_fields,
                             std::unordered_map<std::string, field>& flattened_fields);
 
     static Option<bool> flatten_field(nlohmann::json& doc, nlohmann::json& obj, const field& the_field,
                                       std::vector<std::string>& path_parts, size_t path_index, bool has_array,
-                                      bool has_obj_array,
+                                      bool has_obj_array, bool is_update,
                                       const std::unordered_map<std::string, field>& dyn_fields,
                                       std::unordered_map<std::string, field>& flattened_fields);
 
     static Option<bool> flatten_doc(nlohmann::json& document, const tsl::htrie_map<char, field>& nested_fields,
                                     const std::unordered_map<std::string, field>& dyn_fields,
-                                    bool missing_is_ok, std::vector<field>& flattened_fields);
+                                    bool is_update, std::vector<field>& flattened_fields);
 
     static void compact_nested_fields(tsl::htrie_map<char, field>& nested_fields);
 };
diff --git a/src/field.cpp b/src/field.cpp
index bccd8da5..7d5e399c 100644
--- a/src/field.cpp
+++ b/src/field.cpp
@@ -824,18 +824,41 @@ Option<bool> field::json_field_to_field(bool enable_nested_fields, nlohmann::jso
 }
 
 bool field::flatten_obj(nlohmann::json& doc, nlohmann::json& value, bool has_array, bool has_obj_array,
-                        const field& the_field, const std::string& flat_name,
+                        bool is_update, const field& the_field, const std::string& flat_name,
                         const std::unordered_map<std::string, field>& dyn_fields,
                         std::unordered_map<std::string, field>& flattened_fields) {
     if(value.is_object()) {
         has_obj_array = has_array;
-        for(const auto& kv: value.items()) {
-            flatten_obj(doc, kv.value(), has_array, has_obj_array, the_field, flat_name + "." + kv.key(),
-                        dyn_fields, flattened_fields);
+        auto it = value.begin();
+        while(it != value.end()) {
+            const std::string& child_field_name = flat_name + "." + it.key();
+            if(it.value().is_null()) {
+                if(has_array) {
+                    doc[child_field_name].push_back(nullptr);
+                } else {
+                    doc[child_field_name] = nullptr;
+                }
+
+                field flattened_field;
+                flattened_field.name = child_field_name;
+                flattened_field.type = field_types::NIL;
+                flattened_fields[child_field_name] = flattened_field;
+
+                if(!is_update) {
+                    // update code path requires and takes care of null values
+                    it = value.erase(it);
+                } else {
+                    it++;
+                }
+            } else {
+                flatten_obj(doc, it.value(), has_array, has_obj_array, is_update, the_field, child_field_name,
+                            dyn_fields, flattened_fields);
+                it++;
+            }
         }
     } else if(value.is_array()) {
         for(const auto& kv: value.items()) {
-            flatten_obj(doc, kv.value(), true, has_obj_array, the_field, flat_name, dyn_fields, flattened_fields);
+            flatten_obj(doc, kv.value(), true, has_obj_array, is_update, the_field, flat_name, dyn_fields, flattened_fields);
         }
     } else { // must be a primitive
         if(doc.count(flat_name) != 0 && flattened_fields.find(flat_name) == flattened_fields.end()) {
@@ -891,7 +914,7 @@ bool field::flatten_obj(nlohmann::json& doc, nlohmann::json& value, bool has_arr
 
 Option<bool> field::flatten_field(nlohmann::json& doc, nlohmann::json& obj, const field& the_field,
                                   std::vector<std::string>& path_parts, size_t path_index,
-                                  bool has_array, bool has_obj_array,
+                                  bool has_array, bool has_obj_array, bool is_update,
                                   const std::unordered_map<std::string, field>& dyn_fields,
                                   std::unordered_map<std::string, field>& flattened_fields) {
     if(path_index == path_parts.size()) {
@@ -946,7 +969,8 @@ Option<bool> field::flatten_field(nlohmann::json& doc, nlohmann::json& obj, cons
 
         if(detected_type == the_field.type || is_numericaly_valid) {
             if(the_field.is_object()) {
-                flatten_obj(doc, obj, has_array, has_obj_array, the_field, the_field.name, dyn_fields, flattened_fields);
+                flatten_obj(doc, obj, has_array, has_obj_array, is_update, the_field, the_field.name,
+                            dyn_fields, flattened_fields);
             } else {
                 if(doc.count(the_field.name) != 0 && flattened_fields.find(the_field.name) == flattened_fields.end()) {
                     return Option<bool>(true);
@@ -989,7 +1013,7 @@ Option<bool> field::flatten_field(nlohmann::json& doc, nlohmann::json& obj, cons
             for(auto& ele: it.value()) {
                 has_obj_array = has_obj_array || ele.is_object();
                 Option<bool> op = flatten_field(doc, ele, the_field, path_parts, path_index + 1, has_array,
-                                                has_obj_array, dyn_fields, flattened_fields);
+                                                has_obj_array, is_update, dyn_fields, flattened_fields);
                 if(!op.ok()) {
                     return op;
                 }
@@ -997,7 +1021,7 @@ Option<bool> field::flatten_field(nlohmann::json& doc, nlohmann::json& obj, cons
             return Option<bool>(true);
         } else {
             return flatten_field(doc, it.value(), the_field, path_parts, path_index + 1, has_array, has_obj_array,
-                                 dyn_fields, flattened_fields);
+                                 is_update, dyn_fields, flattened_fields);
         }
     } {
         return Option<bool>(404, "Field `" + the_field.name + "` not found.");
@@ -1007,7 +1031,7 @@ Option<bool> field::flatten_field(nlohmann::json& doc, nlohmann::json& obj, cons
 Option<bool> field::flatten_doc(nlohmann::json& document,
                                 const tsl::htrie_map<char, field>& nested_fields,
                                 const std::unordered_map<std::string, field>& dyn_fields,
-                                bool missing_is_ok, std::vector<field>& flattened_fields) {
+                                bool is_update, std::vector<field>& flattened_fields) {
 
     std::unordered_map<std::string, field> flattened_fields_map;
 
@@ -1021,12 +1045,12 @@ Option<bool> field::flatten_doc(nlohmann::json& document,
         }
 
         auto op = flatten_field(document, document, nested_field, field_parts, 0, false, false,
-                                dyn_fields, flattened_fields_map);
+                                is_update, dyn_fields, flattened_fields_map);
         if(op.ok()) {
             continue;
         }
 
-        if(op.code() == 404 && (missing_is_ok || nested_field.optional)) {
+        if(op.code() == 404 && (is_update || nested_field.optional)) {
             continue;
         } else {
             return op;
@@ -1036,7 +1060,10 @@ Option<bool> field::flatten_doc(nlohmann::json& document,
     document[".flat"] = nlohmann::json::array();
     for(auto& kv: flattened_fields_map) {
         document[".flat"].push_back(kv.second.name);
-        flattened_fields.push_back(kv.second);
+        if(kv.second.type != field_types::NIL) {
+            // not a real field so we won't add it
+            flattened_fields.push_back(kv.second);
+        }
     }
 
     return Option<bool>(true);
diff --git a/test/collection_nested_fields_test.cpp b/test/collection_nested_fields_test.cpp
index a2eef13d..6def5d1a 100644
--- a/test/collection_nested_fields_test.cpp
+++ b/test/collection_nested_fields_test.cpp
@@ -2560,6 +2560,131 @@ TEST_F(CollectionNestedFieldsTest, NullValuesWithExplicitSchema) {
     auto results = coll1->search("jack", {"name.first"}, "", {}, {}, {0}, 10, 1, FREQUENCY, {false}).get();
     ASSERT_EQ(1, results["found"].get<size_t>());
     ASSERT_EQ(2, results["hits"][0]["document"].size());  // id, name
+    ASSERT_EQ(1, results["hits"][0]["document"]["name"].size());  // name.first
+    ASSERT_EQ("Jack", results["hits"][0]["document"]["name"]["first"].get<std::string>());
+}
+
+TEST_F(CollectionNestedFieldsTest, EmplaceWithNullValueOnRequiredField) {
+    nlohmann::json schema = R"({
+        "name": "coll1",
+        "enable_nested_fields": true,
+        "fields": [
+            {"name":"currency", "type":"object"},
+            {"name":"currency.eu", "type":"int32", "optional": false}
+        ]
+    })"_json;
+
+    auto op = collectionManager.create_collection(schema);
+    ASSERT_TRUE(op.ok());
+    Collection *coll1 = op.get();
+
+    auto doc1 = R"({
+      "id": "0",
+      "currency": {
+        "eu": 12000
+      }
+    })"_json;
+
+    auto add_op = coll1->add(doc1.dump(), CREATE);
+    ASSERT_TRUE(add_op.ok());
+
+    // now update with null value -- should not be allowed
+    auto update_doc = R"({
+      "id": "0",
+      "currency": {
+        "eu": null
+      }
+    })"_json;
+
+    auto update_op = coll1->add(update_doc.dump(), EMPLACE);
+    ASSERT_FALSE(update_op.ok());
+    ASSERT_EQ("Field `currency.eu` must be an int32.", update_op.error());
+}
+
+TEST_F(CollectionNestedFieldsTest, EmplaceWithNullValueOnOptionalField) {
+    nlohmann::json schema = R"({
+        "name": "coll1",
+        "enable_nested_fields": true,
+        "fields": [
+            {"name":"currency", "type":"object"},
+            {"name":"currency.eu", "type":"int32", "optional": true}
+        ]
+    })"_json;
+
+    auto op = collectionManager.create_collection(schema);
+    ASSERT_TRUE(op.ok());
+    Collection *coll1 = op.get();
+
+    auto doc1 = R"({
+      "id": "0",
+      "currency": {
+        "eu": 12000
+      }
+    })"_json;
+
+    auto add_op = coll1->add(doc1.dump(), CREATE);
+    ASSERT_TRUE(add_op.ok());
+
+    // now update with null value -- should be allowed since field is optional
+    auto update_doc = R"({
+      "id": "0",
+      "currency": {
+        "eu": null
+      }
+    })"_json;
+
+    auto update_op = coll1->add(update_doc.dump(), EMPLACE);
+    ASSERT_TRUE(update_op.ok());
+
+    // try to fetch the document to see the stored value
+    auto results = coll1->search("*", {}, "", {}, {}, {0}, 10, 1, FREQUENCY, {false}).get();
+    ASSERT_EQ(1, results["found"].get<size_t>());
+    ASSERT_EQ(2, results["hits"][0]["document"].size());  // id, currency
+    ASSERT_EQ(0, results["hits"][0]["document"]["currency"].size());
+}
+
+TEST_F(CollectionNestedFieldsTest, EmplaceWithMissingArrayValueOnOptionalField) {
+    nlohmann::json schema = R"({
+        "name": "coll1",
+        "enable_nested_fields": true,
+        "fields": [
+            {"name":"currency", "type":"object[]"},
+            {"name":"currency.eu", "type":"int32[]", "optional": true}
+        ]
+    })"_json;
+
+    auto op = collectionManager.create_collection(schema);
+    ASSERT_TRUE(op.ok());
+    Collection *coll1 = op.get();
+
+    auto doc1 = R"({
+      "id": "0",
+      "currency": [
+        {"eu": 12000},
+        {"us": 10000}
+      ]
+    })"_json;
+
+    auto add_op = coll1->add(doc1.dump(), CREATE);
+    ASSERT_TRUE(add_op.ok());
+
+    // now update with null value -- should be allowed since field is optional
+    auto update_doc = R"({
+      "id": "0",
+      "currency": [
+        {"us": 10000}
+      ]
+    })"_json;
+
+    auto update_op = coll1->add(update_doc.dump(), EMPLACE);
+    ASSERT_TRUE(update_op.ok());
+
+    // try to fetch the document to see the stored value
+    auto results = coll1->search("*", {}, "", {}, {}, {0}, 10, 1, FREQUENCY, {false}).get();
+    ASSERT_EQ(1, results["found"].get<size_t>());
+    ASSERT_EQ(2, results["hits"][0]["document"].size());  // id, currency
+    ASSERT_EQ(1, results["hits"][0]["document"]["currency"].size());
+    ASSERT_EQ(10000, results["hits"][0]["document"]["currency"][0]["us"].get<uint32_t>());
 }
 
 TEST_F(CollectionNestedFieldsTest, UpdateNestedDocument) {

From b3f248bd934935d6b34ccd2439b19a639ed1fb86 Mon Sep 17 00:00:00 2001
From: Kishore Nallan <kishorenc@gmail.com>
Date: Tue, 8 Aug 2023 20:24:15 +0530
Subject: [PATCH 02/16] Handle emplace + null values.

---
 src/validator.cpp                      |  2 +-
 test/collection_nested_fields_test.cpp | 15 ++++++++++++++-
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/src/validator.cpp b/src/validator.cpp
index 51a7d19c..f814c923 100644
--- a/src/validator.cpp
+++ b/src/validator.cpp
@@ -626,7 +626,7 @@ Option<uint32_t> validator_t::validate_index_in_memory(nlohmann::json& document,
             continue;
         }
 
-        if((a_field.optional || op == UPDATE || op == EMPLACE) && document.count(field_name) == 0) {
+        if((a_field.optional || op == UPDATE || (op == EMPLACE && is_update)) && document.count(field_name) == 0) {
             continue;
         }
 
diff --git a/test/collection_nested_fields_test.cpp b/test/collection_nested_fields_test.cpp
index 6def5d1a..98a94f37 100644
--- a/test/collection_nested_fields_test.cpp
+++ b/test/collection_nested_fields_test.cpp
@@ -2578,6 +2578,19 @@ TEST_F(CollectionNestedFieldsTest, EmplaceWithNullValueOnRequiredField) {
     ASSERT_TRUE(op.ok());
     Collection *coll1 = op.get();
 
+    auto doc_with_null = R"({
+      "id": "0",
+      "currency": {
+        "eu": null
+      }
+    })"_json;
+
+    auto add_op = coll1->add(doc_with_null.dump(), EMPLACE);
+    ASSERT_FALSE(add_op.ok());
+
+    add_op = coll1->add(doc_with_null.dump(), CREATE);
+    ASSERT_FALSE(add_op.ok());
+
     auto doc1 = R"({
       "id": "0",
       "currency": {
@@ -2585,7 +2598,7 @@ TEST_F(CollectionNestedFieldsTest, EmplaceWithNullValueOnRequiredField) {
       }
     })"_json;
 
-    auto add_op = coll1->add(doc1.dump(), CREATE);
+    add_op = coll1->add(doc1.dump(), CREATE);
     ASSERT_TRUE(add_op.ok());
 
     // now update with null value -- should not be allowed

From 379604cad167659d9441e760da98840ea0eef9dc Mon Sep 17 00:00:00 2001
From: ozanarmagan <o.armagan2020@gtu.edu.tr>
Date: Tue, 8 Aug 2023 18:34:07 +0300
Subject: [PATCH 03/16] Fix wrong hybrid search text match score

---
 include/topster.h                      |  4 ++
 src/collection.cpp                     |  4 +-
 src/index.cpp                          |  3 ++
 test/collection_specific_more_test.cpp | 52 ++++++++++++++++++++++++++
 4 files changed, 61 insertions(+), 2 deletions(-)

diff --git a/include/topster.h b/include/topster.h
index b0b8f125..c7378f2b 100644
--- a/include/topster.h
+++ b/include/topster.h
@@ -31,6 +31,10 @@ struct KV {
         this->scores[0] = scores[0];
         this->scores[1] = scores[1];
         this->scores[2] = scores[2];
+
+        if(match_score_index >= 0) {
+            this->text_match_score = scores[match_score_index];
+        }
     }
 
     KV() = default;
diff --git a/src/collection.cpp b/src/collection.cpp
index e60ffe41..88880b51 100644
--- a/src/collection.cpp
+++ b/src/collection.cpp
@@ -1957,10 +1957,10 @@ Option<nlohmann::json> Collection::search(std::string  raw_query,
             if(field_order_kv->match_score_index == CURATED_RECORD_IDENTIFIER) {
                 wrapper_doc["curated"] = true;
             } else if(field_order_kv->match_score_index >= 0) {
-                wrapper_doc["text_match"] = field_order_kv->scores[field_order_kv->match_score_index];
+                wrapper_doc["text_match"] = field_order_kv->text_match_score;
                 wrapper_doc["text_match_info"] = nlohmann::json::object();
                 populate_text_match_info(wrapper_doc["text_match_info"],
-                                        field_order_kv->scores[field_order_kv->match_score_index], match_type);
+                                        field_order_kv->text_match_score, match_type);
                 if(!vector_query.field_name.empty()) {
                     wrapper_doc["hybrid_search_info"] = nlohmann::json::object();
                     wrapper_doc["hybrid_search_info"]["rank_fusion_score"] = Index::int64_t_to_float(field_order_kv->scores[field_order_kv->match_score_index]);
diff --git a/src/index.cpp b/src/index.cpp
index 7192faba..05444b73 100644
--- a/src/index.cpp
+++ b/src/index.cpp
@@ -3204,6 +3204,7 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
                         auto result = result_it->second;
                         // old_score + (1 / rank_of_document) * WEIGHT)
                         result->vector_distance = vec_result.second;
+                        result->text_match_score  = result->scores[result->match_score_index];
                         int64_t match_score = float_to_int64_t(
                                 (int64_t_to_float(result->scores[result->match_score_index])) +
                                 ((1.0 / (res_index + 1)) * VECTOR_SEARCH_WEIGHT));
@@ -3225,6 +3226,7 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
                         int64_t match_score_index = -1;
                         compute_sort_scores(sort_fields_std, sort_order, field_values, geopoint_indices, doc_id, 0, match_score, scores, match_score_index, vec_result.second);
                         KV kv(searched_queries.size(), doc_id, doc_id, match_score_index, scores);
+                        kv.text_match_score = 0;
                         kv.vector_distance = vec_result.second;
                         topster->add(&kv);
                         vec_search_ids.push_back(doc_id);
@@ -4154,6 +4156,7 @@ void Index::search_across_fields(const std::vector<token_t>& query_tokens,
         KV kv(searched_queries.size(), seq_id, distinct_id, match_score_index, scores);
         if(match_score_index != -1) {
             kv.scores[match_score_index] = aggregated_score;
+            kv.text_match_score = aggregated_score;
         }
 
         int ret = topster->add(&kv);
diff --git a/test/collection_specific_more_test.cpp b/test/collection_specific_more_test.cpp
index e3f82f69..3e5b43a7 100644
--- a/test/collection_specific_more_test.cpp
+++ b/test/collection_specific_more_test.cpp
@@ -2530,4 +2530,56 @@ TEST_F(CollectionSpecificMoreTest, ApproxFilterMatchCount) {
 
     delete filter_tree_root;
     collectionManager.drop_collection("Collection");
+}
+
+TEST_F(CollectionSpecificMoreTest, HybridSearchTextMatchInfo) {
+    auto schema_json =
+            R"({
+                "name": "Products",
+                "fields": [
+                    {"name": "product_id", "type": "string"},
+                    {"name": "product_name", "type": "string", "infix": true},
+                    {"name": "product_description", "type": "string"},
+                    {"name": "embedding", "type":"float[]", "embed":{"from": ["product_description"], "model_config": {"model_name": "ts/e5-small"}}}
+                ]
+            })"_json;
+    std::vector<nlohmann::json> documents = {
+            R"({
+                "product_id": "product_a",
+                "product_name": "shampoo",
+                "product_description": "Our new moisturizing shampoo is perfect for those with dry or damaged hair."
+            })"_json,
+            R"({
+                "product_id": "product_b",
+                "product_name": "soap",
+                "product_description": "Introducing our all-natural, organic soap bar made with essential oils and botanical ingredients."
+            })"_json
+    };
+
+    TextEmbedderManager::set_model_dir("/tmp/typesense_test/models");
+
+    auto collection_create_op = collectionManager.create_collection(schema_json);
+    ASSERT_TRUE(collection_create_op.ok());
+    for (auto const &json: documents) {
+        auto add_op = collection_create_op.get()->add(json.dump());
+        ASSERT_TRUE(add_op.ok());
+    }
+
+    auto coll1 = collection_create_op.get();
+    auto results = coll1->search("natural products", {"product_name", "embedding"},
+                                 "", {}, {}, {2}, 10,
+                                 1, FREQUENCY, {true},
+                                 0, spp::sparse_hash_set<std::string>()).get();
+
+    ASSERT_EQ(2, results["hits"].size());
+
+    // It's a hybrid search with only vector match
+    ASSERT_EQ("0", results["hits"][0]["text_match_info"]["score"].get<std::string>());
+    ASSERT_EQ("0", results["hits"][1]["text_match_info"]["score"].get<std::string>());
+
+    ASSERT_EQ(0, results["hits"][0]["text_match_info"]["fields_matched"].get<size_t>());
+    ASSERT_EQ(0, results["hits"][1]["text_match_info"]["fields_matched"].get<size_t>());
+
+    ASSERT_EQ(0, results["hits"][0]["text_match_info"]["tokens_matched"].get<size_t>());
+    ASSERT_EQ(0, results["hits"][1]["text_match_info"]["tokens_matched"].get<size_t>());
 }
\ No newline at end of file

From a99929f05fa09b13e060c3cc1caede29f61917b1 Mon Sep 17 00:00:00 2001
From: Kishore Nallan <kishorenc@gmail.com>
Date: Wed, 9 Aug 2023 19:02:36 +0530
Subject: [PATCH 04/16] Address change in streaming behavior of h2o on http/2.

_req->proceed_req is not 1 when http2 is used for chunks that follow the first chunk.
---
 src/http_server.cpp | 10 ++++------
 src/raft_server.cpp |  4 ++--
 2 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/src/http_server.cpp b/src/http_server.cpp
index 9fbfb2a5..50391ed8 100644
--- a/src/http_server.cpp
+++ b/src/http_server.cpp
@@ -569,13 +569,11 @@ int HttpServer::async_req_cb(void *ctx, int is_end_stream) {
     bool async_req = custom_generator->rpath->async_req;
     bool is_http_v1 = (0x101 <= request->_req->version && request->_req->version < 0x200);
 
-    /*
-    LOG(INFO) << "async_req_cb, chunk.len=" << chunk.len
+    /*LOG(INFO) << "async_req_cb, chunk.len=" << chunk.len
               << ", is_http_v1: " << is_http_v1
-              << ", request->req->entity.len=" << request->req->entity.len
-              << ", content_len: " << request->req->content_length
-              << ", is_end_stream=" << is_end_stream;
-    */
+              << ", request->req->entity.len=" << request->_req->entity.len
+              << ", content_len: " << request->_req->content_length
+              << ", is_end_stream=" << is_end_stream;*/
 
     // disallow specific curl clients from using import call via http2
     // detects: https://github.com/curl/curl/issues/1410
diff --git a/src/raft_server.cpp b/src/raft_server.cpp
index 48584937..f33a6518 100644
--- a/src/raft_server.cpp
+++ b/src/raft_server.cpp
@@ -254,7 +254,7 @@ void ReplicationState::write_to_leader(const std::shared_ptr<http_req>& request,
         // Handle no leader scenario
         LOG(ERROR) << "Rejecting write: could not find a leader.";
 
-        if(request->_req->proceed_req && response->proxied_stream) {
+        if(response->proxied_stream) {
             // streaming in progress: ensure graceful termination (cannot start response again)
             LOG(ERROR) << "Terminating streaming request gracefully.";
             response->is_alive = false;
@@ -267,7 +267,7 @@ void ReplicationState::write_to_leader(const std::shared_ptr<http_req>& request,
         return message_dispatcher->send_message(HttpServer::STREAM_RESPONSE_MESSAGE, req_res);
     }
 
-    if (request->_req->proceed_req && response->proxied_stream) {
+    if (response->proxied_stream) {
         // indicates async request body of in-flight request
         //LOG(INFO) << "Inflight proxied request, returning control to caller, body_size=" << request->body.size();
         request->notify();

From 64ec0fea41b2840ec91b33989cf2a1331807c5f4 Mon Sep 17 00:00:00 2001
From: ozanarmagan <o.armagan2020@gtu.edu.tr>
Date: Thu, 10 Aug 2023 09:52:34 +0300
Subject: [PATCH 05/16] Fix search results of semantic search

---
 src/collection.cpp | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/src/collection.cpp b/src/collection.cpp
index 88880b51..1fbce5cf 100644
--- a/src/collection.cpp
+++ b/src/collection.cpp
@@ -1073,7 +1073,7 @@ Option<bool> Collection::extract_field_name(const std::string& field_name,
     return Option<bool>(true);
 }
 
-Option<nlohmann::json> Collection::search(std::string  raw_query,
+Option<nlohmann::json> Collection::search(std::string raw_query,
                                   const std::vector<std::string>& raw_search_fields,
                                   const std::string & filter_query, const std::vector<std::string>& facet_fields,
                                   const std::vector<sort_by> & sort_fields, const std::vector<uint32_t>& num_typos,
@@ -1201,6 +1201,7 @@ Option<nlohmann::json> Collection::search(std::string  raw_query,
     std::vector<std::string> processed_search_fields;
     std::vector<uint32_t> query_by_weights;
     size_t num_embed_fields = 0;
+    std::string query = raw_query;
 
     for(size_t i = 0; i < raw_search_fields.size(); i++) {
         const std::string& field_name = raw_search_fields[i];
@@ -1289,6 +1290,11 @@ Option<nlohmann::json> Collection::search(std::string  raw_query,
         }
     }
 
+    // Set query to * if it is semantic search
+    if(!vector_query.field_name.empty() && processed_search_fields.empty()) {
+        query = "*";
+    }
+
     if(!vector_query.field_name.empty() && vector_query.values.empty() && num_embed_fields == 0) {
         std::string error = "Vector query could not find any embedded fields.";
         return Option<nlohmann::json>(400, error);
@@ -1444,7 +1450,7 @@ Option<nlohmann::json> Collection::search(std::string  raw_query,
     size_t max_hits = DEFAULT_TOPSTER_SIZE;
 
     // ensure that `max_hits` never exceeds number of documents in collection
-    if(search_fields.size() <= 1 || raw_query == "*") {
+    if(search_fields.size() <= 1 || query == "*") {
         max_hits = std::min(std::max(fetch_size, max_hits), get_num_documents());
     } else {
         max_hits = std::min(std::max(fetch_size, max_hits), get_num_documents());
@@ -1477,7 +1483,6 @@ Option<nlohmann::json> Collection::search(std::string  raw_query,
     StringUtils::split(hidden_hits_str, hidden_hits, ",");
 
     std::vector<const override_t*> filter_overrides;
-    std::string query = raw_query;
     bool filter_curated_hits = false;
     std::string curated_sort_by;
     curate_results(query, filter_query, enable_overrides, pre_segmented_query, pinned_hits, hidden_hits,
@@ -1520,6 +1525,10 @@ Option<nlohmann::json> Collection::search(std::string  raw_query,
     bool is_group_by_query = group_by_fields.size() > 0;
     bool is_vector_query = !vector_query.field_name.empty();
 
+    LOG(INFO) << "is_wildcard_query: " << is_wildcard_query;
+    LOG(INFO) << "is_group_by_query: " << is_group_by_query;
+    LOG(INFO) << "is_vector_query: " << is_vector_query;
+
     if(curated_sort_by.empty()) {
         auto sort_validation_op = validate_and_standardize_sort_fields(sort_fields, 
                                     sort_fields_std, is_wildcard_query, is_vector_query, is_group_by_query);

From c4919bb358688fb2e830d27afef67f6d9a4221ce Mon Sep 17 00:00:00 2001
From: ozanarmagan <o.armagan2020@gtu.edu.tr>
Date: Thu, 10 Aug 2023 09:53:26 +0300
Subject: [PATCH 06/16] Remove logs

---
 src/collection.cpp | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/src/collection.cpp b/src/collection.cpp
index 1fbce5cf..fdc20482 100644
--- a/src/collection.cpp
+++ b/src/collection.cpp
@@ -1525,10 +1525,6 @@ Option<nlohmann::json> Collection::search(std::string raw_query,
     bool is_group_by_query = group_by_fields.size() > 0;
     bool is_vector_query = !vector_query.field_name.empty();
 
-    LOG(INFO) << "is_wildcard_query: " << is_wildcard_query;
-    LOG(INFO) << "is_group_by_query: " << is_group_by_query;
-    LOG(INFO) << "is_vector_query: " << is_vector_query;
-
     if(curated_sort_by.empty()) {
         auto sort_validation_op = validate_and_standardize_sort_fields(sort_fields, 
                                     sort_fields_std, is_wildcard_query, is_vector_query, is_group_by_query);

From d1692501fa846c431b3cf4abe4775d187ab5911b Mon Sep 17 00:00:00 2001
From: ozanarmagan <o.armagan2020@gtu.edu.tr>
Date: Thu, 10 Aug 2023 14:22:14 +0300
Subject: [PATCH 07/16] Fix text embedding field detection

---
 src/collection.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/collection.cpp b/src/collection.cpp
index fdc20482..8699e51c 100644
--- a/src/collection.cpp
+++ b/src/collection.cpp
@@ -1043,7 +1043,7 @@ Option<bool> Collection::extract_field_name(const std::string& field_name,
     for(auto kv = prefix_it.first; kv != prefix_it.second; ++kv) {
         bool exact_key_match = (kv.key().size() == field_name.size());
         bool exact_primitive_match = exact_key_match && !kv.value().is_object();
-        bool text_embedding = kv.value().type == field_types::FLOAT_ARRAY && kv.value().embed.count(fields::from) != 0;
+        bool text_embedding = kv.value().type == field_types::FLOAT_ARRAY && kv.value().num_dim > 0;
 
         if(extract_only_string_fields && !kv.value().is_string() && !text_embedding) {
             if(exact_primitive_match && !is_wildcard) {

From 278c29b3ea0c8cdda1edd7b9b9e21f9633d11e57 Mon Sep 17 00:00:00 2001
From: ozanarmagan <o.armagan2020@gtu.edu.tr>
Date: Thu, 10 Aug 2023 14:22:25 +0300
Subject: [PATCH 08/16] Add tests

---
 test/collection_specific_more_test.cpp | 113 +++++++++++++++++++++++++
 1 file changed, 113 insertions(+)

diff --git a/test/collection_specific_more_test.cpp b/test/collection_specific_more_test.cpp
index 3e5b43a7..7d44351c 100644
--- a/test/collection_specific_more_test.cpp
+++ b/test/collection_specific_more_test.cpp
@@ -2582,4 +2582,117 @@ TEST_F(CollectionSpecificMoreTest, HybridSearchTextMatchInfo) {
 
     ASSERT_EQ(0, results["hits"][0]["text_match_info"]["tokens_matched"].get<size_t>());
     ASSERT_EQ(0, results["hits"][1]["text_match_info"]["tokens_matched"].get<size_t>());
+}
+
+
+TEST_F(CollectionSpecificMoreTest, SemanticSearchReturnOnlyVectorDistance) {
+    auto schema_json =
+        R"({
+            "name": "Products",
+            "fields": [
+                {"name": "product_name", "type": "string", "infix": true},
+                {"name": "category", "type": "string"},
+                {"name": "embedding", "type":"float[]", "embed":{"from": ["product_name", "category"], "model_config": {"model_name": "ts/e5-small"}}}
+            ]
+        })"_json;
+
+    
+    TextEmbedderManager::set_model_dir("/tmp/typesense_test/models");
+
+    auto collection_create_op = collectionManager.create_collection(schema_json);
+    ASSERT_TRUE(collection_create_op.ok());
+    auto coll1 = collection_create_op.get();
+
+    auto add_op = coll1->add(R"({
+        "product_name": "moisturizer",
+        "category": "beauty"
+    })"_json.dump());
+
+    ASSERT_TRUE(add_op.ok());
+
+    auto results = coll1->search("moisturizer", {"embedding"},
+                                 "", {}, {}, {2}, 10,
+                                 1, FREQUENCY, {true},
+                                 0, spp::sparse_hash_set<std::string>()).get();
+    
+    ASSERT_EQ(1, results["hits"].size());
+
+    // Return only vector distance
+    ASSERT_EQ(0, results["hits"][0].count("text_match_info"));
+    ASSERT_EQ(0, results["hits"][0].count("hybrid_search_info"));
+    ASSERT_EQ(1, results["hits"][0].count("vector_distance"));
+}
+
+TEST_F(CollectionSpecificMoreTest, KeywordSearchReturnOnlyTextMatchInfo) {
+    auto schema_json =
+            R"({
+            "name": "Products",
+            "fields": [
+                {"name": "product_name", "type": "string", "infix": true},
+                {"name": "category", "type": "string"},
+                {"name": "embedding", "type":"float[]", "embed":{"from": ["product_name", "category"], "model_config": {"model_name": "ts/e5-small"}}}
+            ]
+        })"_json;
+
+
+    TextEmbedderManager::set_model_dir("/tmp/typesense_test/models");
+
+    auto collection_create_op = collectionManager.create_collection(schema_json);
+    ASSERT_TRUE(collection_create_op.ok());
+    auto coll1 = collection_create_op.get();
+    auto add_op = coll1->add(R"({
+        "product_name": "moisturizer",
+        "category": "beauty"
+    })"_json.dump());
+    ASSERT_TRUE(add_op.ok());
+
+    auto results = coll1->search("moisturizer", {"product_name"},
+                                 "", {}, {}, {2}, 10,
+                                 1, FREQUENCY, {true},
+                                 0, spp::sparse_hash_set<std::string>()).get();
+
+    
+    ASSERT_EQ(1, results["hits"].size());
+
+    // Return only text match info
+    ASSERT_EQ(0, results["hits"][0].count("vector_distance"));
+    ASSERT_EQ(0, results["hits"][0].count("hybrid_search_info"));
+    ASSERT_EQ(1, results["hits"][0].count("text_match_info"));
+}
+
+TEST_F(CollectionSpecificMoreTest, HybridSearchReturnAllInfo) {
+    auto schema_json =
+            R"({
+            "name": "Products",
+            "fields": [
+                {"name": "product_name", "type": "string", "infix": true},
+                {"name": "category", "type": "string"},
+                {"name": "embedding", "type":"float[]", "embed":{"from": ["product_name", "category"], "model_config": {"model_name": "ts/e5-small"}}}
+            ]
+        })"_json;
+    
+    TextEmbedderManager::set_model_dir("/tmp/typesense_test/models");
+
+    auto collection_create_op = collectionManager.create_collection(schema_json);
+    ASSERT_TRUE(collection_create_op.ok());
+    auto coll1 = collection_create_op.get();
+
+    auto add_op = coll1->add(R"({
+        "product_name": "moisturizer",
+        "category": "beauty"
+    })"_json.dump());
+    ASSERT_TRUE(add_op.ok());
+
+
+    auto results = coll1->search("moisturizer", {"product_name", "embedding"},
+                                 "", {}, {}, {2}, 10,
+                                 1, FREQUENCY, {true},
+                                 0, spp::sparse_hash_set<std::string>()).get();
+    
+    ASSERT_EQ(1, results["hits"].size());
+
+    // Return all info
+    ASSERT_EQ(1, results["hits"][0].count("vector_distance"));
+    ASSERT_EQ(1, results["hits"][0].count("text_match_info"));
+    ASSERT_EQ(1, results["hits"][0].count("hybrid_search_info"));
 }
\ No newline at end of file

From 7096ad0c253f8d37897204393f57bcc802f49ddf Mon Sep 17 00:00:00 2001
From: ozanarmagan <o.armagan2020@gtu.edu.tr>
Date: Thu, 10 Aug 2023 14:29:27 +0300
Subject: [PATCH 09/16] Remove log

---
 src/field.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/field.cpp b/src/field.cpp
index 8c97bd2b..dc82a021 100644
--- a/src/field.cpp
+++ b/src/field.cpp
@@ -1095,8 +1095,6 @@ Option<bool> field::validate_and_init_embed_fields(const std::vector<std::pair<s
         const std::string err_msg = "Property `" + fields::embed + "." + fields::from +
                                     "` can only refer to string or string array fields.";
 
-        LOG(INFO) << "field_json: " << field_json;
-
         for(auto& field_name : field_json[fields::embed][fields::from].get<std::vector<std::string>>()) {
             auto embed_field = std::find_if(fields_json.begin(), fields_json.end(), [&field_name](const nlohmann::json& x) {
                 return x["name"].get<std::string>() == field_name;

From 093442857a4c38a0e2129b5309b12493c9035020 Mon Sep 17 00:00:00 2001
From: ozanarmagan <o.armagan2020@gtu.edu.tr>
Date: Thu, 10 Aug 2023 14:31:58 +0300
Subject: [PATCH 10/16] Move tests

---
 test/collection_specific_more_test.cpp | 111 ------------------------
 test/collection_vector_search_test.cpp | 114 ++++++++++++++++++++++++-
 2 files changed, 113 insertions(+), 112 deletions(-)

diff --git a/test/collection_specific_more_test.cpp b/test/collection_specific_more_test.cpp
index 7d44351c..fbc78e22 100644
--- a/test/collection_specific_more_test.cpp
+++ b/test/collection_specific_more_test.cpp
@@ -2585,114 +2585,3 @@ TEST_F(CollectionSpecificMoreTest, HybridSearchTextMatchInfo) {
 }
 
 
-TEST_F(CollectionSpecificMoreTest, SemanticSearchReturnOnlyVectorDistance) {
-    auto schema_json =
-        R"({
-            "name": "Products",
-            "fields": [
-                {"name": "product_name", "type": "string", "infix": true},
-                {"name": "category", "type": "string"},
-                {"name": "embedding", "type":"float[]", "embed":{"from": ["product_name", "category"], "model_config": {"model_name": "ts/e5-small"}}}
-            ]
-        })"_json;
-
-    
-    TextEmbedderManager::set_model_dir("/tmp/typesense_test/models");
-
-    auto collection_create_op = collectionManager.create_collection(schema_json);
-    ASSERT_TRUE(collection_create_op.ok());
-    auto coll1 = collection_create_op.get();
-
-    auto add_op = coll1->add(R"({
-        "product_name": "moisturizer",
-        "category": "beauty"
-    })"_json.dump());
-
-    ASSERT_TRUE(add_op.ok());
-
-    auto results = coll1->search("moisturizer", {"embedding"},
-                                 "", {}, {}, {2}, 10,
-                                 1, FREQUENCY, {true},
-                                 0, spp::sparse_hash_set<std::string>()).get();
-    
-    ASSERT_EQ(1, results["hits"].size());
-
-    // Return only vector distance
-    ASSERT_EQ(0, results["hits"][0].count("text_match_info"));
-    ASSERT_EQ(0, results["hits"][0].count("hybrid_search_info"));
-    ASSERT_EQ(1, results["hits"][0].count("vector_distance"));
-}
-
-TEST_F(CollectionSpecificMoreTest, KeywordSearchReturnOnlyTextMatchInfo) {
-    auto schema_json =
-            R"({
-            "name": "Products",
-            "fields": [
-                {"name": "product_name", "type": "string", "infix": true},
-                {"name": "category", "type": "string"},
-                {"name": "embedding", "type":"float[]", "embed":{"from": ["product_name", "category"], "model_config": {"model_name": "ts/e5-small"}}}
-            ]
-        })"_json;
-
-
-    TextEmbedderManager::set_model_dir("/tmp/typesense_test/models");
-
-    auto collection_create_op = collectionManager.create_collection(schema_json);
-    ASSERT_TRUE(collection_create_op.ok());
-    auto coll1 = collection_create_op.get();
-    auto add_op = coll1->add(R"({
-        "product_name": "moisturizer",
-        "category": "beauty"
-    })"_json.dump());
-    ASSERT_TRUE(add_op.ok());
-
-    auto results = coll1->search("moisturizer", {"product_name"},
-                                 "", {}, {}, {2}, 10,
-                                 1, FREQUENCY, {true},
-                                 0, spp::sparse_hash_set<std::string>()).get();
-
-    
-    ASSERT_EQ(1, results["hits"].size());
-
-    // Return only text match info
-    ASSERT_EQ(0, results["hits"][0].count("vector_distance"));
-    ASSERT_EQ(0, results["hits"][0].count("hybrid_search_info"));
-    ASSERT_EQ(1, results["hits"][0].count("text_match_info"));
-}
-
-TEST_F(CollectionSpecificMoreTest, HybridSearchReturnAllInfo) {
-    auto schema_json =
-            R"({
-            "name": "Products",
-            "fields": [
-                {"name": "product_name", "type": "string", "infix": true},
-                {"name": "category", "type": "string"},
-                {"name": "embedding", "type":"float[]", "embed":{"from": ["product_name", "category"], "model_config": {"model_name": "ts/e5-small"}}}
-            ]
-        })"_json;
-    
-    TextEmbedderManager::set_model_dir("/tmp/typesense_test/models");
-
-    auto collection_create_op = collectionManager.create_collection(schema_json);
-    ASSERT_TRUE(collection_create_op.ok());
-    auto coll1 = collection_create_op.get();
-
-    auto add_op = coll1->add(R"({
-        "product_name": "moisturizer",
-        "category": "beauty"
-    })"_json.dump());
-    ASSERT_TRUE(add_op.ok());
-
-
-    auto results = coll1->search("moisturizer", {"product_name", "embedding"},
-                                 "", {}, {}, {2}, 10,
-                                 1, FREQUENCY, {true},
-                                 0, spp::sparse_hash_set<std::string>()).get();
-    
-    ASSERT_EQ(1, results["hits"].size());
-
-    // Return all info
-    ASSERT_EQ(1, results["hits"][0].count("vector_distance"));
-    ASSERT_EQ(1, results["hits"][0].count("text_match_info"));
-    ASSERT_EQ(1, results["hits"][0].count("hybrid_search_info"));
-}
\ No newline at end of file
diff --git a/test/collection_vector_search_test.cpp b/test/collection_vector_search_test.cpp
index bfb20be5..390b9fa7 100644
--- a/test/collection_vector_search_test.cpp
+++ b/test/collection_vector_search_test.cpp
@@ -775,7 +775,7 @@ TEST_F(CollectionVectorTest, HybridSearchWithExplicitVector) {
     ASSERT_EQ(2, search_res["found"].get<size_t>());
     ASSERT_EQ(2, search_res["hits"].size());
 
-    ASSERT_FLOAT_EQ(0.04620, search_res["hits"][0]["vector_distance"].get<float>());
+    ASSERT_FLOAT_EQ(0.046207964, search_res["hits"][0]["vector_distance"].get<float>());
     ASSERT_FLOAT_EQ(0.1213316321, search_res["hits"][1]["vector_distance"].get<float>());
 
     // to pass k param
@@ -1031,4 +1031,116 @@ TEST_F(CollectionVectorTest, EmbedFromOptionalNullField) {
     add_op = coll->add(doc.dump());
 
     ASSERT_TRUE(add_op.ok());
+}
+
+TEST_F(CollectionVectorTest, SemanticSearchReturnOnlyVectorDistance) {
+    auto schema_json =
+        R"({
+            "name": "Products",
+            "fields": [
+                {"name": "product_name", "type": "string", "infix": true},
+                {"name": "category", "type": "string"},
+                {"name": "embedding", "type":"float[]", "embed":{"from": ["product_name", "category"], "model_config": {"model_name": "ts/e5-small"}}}
+            ]
+        })"_json;
+
+    
+    TextEmbedderManager::set_model_dir("/tmp/typesense_test/models");
+
+    auto collection_create_op = collectionManager.create_collection(schema_json);
+    ASSERT_TRUE(collection_create_op.ok());
+    auto coll1 = collection_create_op.get();
+
+    auto add_op = coll1->add(R"({
+        "product_name": "moisturizer",
+        "category": "beauty"
+    })"_json.dump());
+
+    ASSERT_TRUE(add_op.ok());
+
+    auto results = coll1->search("moisturizer", {"embedding"},
+                                 "", {}, {}, {2}, 10,
+                                 1, FREQUENCY, {true},
+                                 0, spp::sparse_hash_set<std::string>()).get();
+    
+    ASSERT_EQ(1, results["hits"].size());
+
+    // Return only vector distance
+    ASSERT_EQ(0, results["hits"][0].count("text_match_info"));
+    ASSERT_EQ(0, results["hits"][0].count("hybrid_search_info"));
+    ASSERT_EQ(1, results["hits"][0].count("vector_distance"));
+}
+
+TEST_F(CollectionVectorTest, KeywordSearchReturnOnlyTextMatchInfo) {
+    auto schema_json =
+            R"({
+            "name": "Products",
+            "fields": [
+                {"name": "product_name", "type": "string", "infix": true},
+                {"name": "category", "type": "string"},
+                {"name": "embedding", "type":"float[]", "embed":{"from": ["product_name", "category"], "model_config": {"model_name": "ts/e5-small"}}}
+            ]
+        })"_json;
+
+
+    TextEmbedderManager::set_model_dir("/tmp/typesense_test/models");
+
+    auto collection_create_op = collectionManager.create_collection(schema_json);
+    ASSERT_TRUE(collection_create_op.ok());
+    auto coll1 = collection_create_op.get();
+    auto add_op = coll1->add(R"({
+        "product_name": "moisturizer",
+        "category": "beauty"
+    })"_json.dump());
+    ASSERT_TRUE(add_op.ok());
+
+    auto results = coll1->search("moisturizer", {"product_name"},
+                                 "", {}, {}, {2}, 10,
+                                 1, FREQUENCY, {true},
+                                 0, spp::sparse_hash_set<std::string>()).get();
+
+    
+    ASSERT_EQ(1, results["hits"].size());
+
+    // Return only text match info
+    ASSERT_EQ(0, results["hits"][0].count("vector_distance"));
+    ASSERT_EQ(0, results["hits"][0].count("hybrid_search_info"));
+    ASSERT_EQ(1, results["hits"][0].count("text_match_info"));
+}
+
+TEST_F(CollectionVectorTest, HybridSearchReturnAllInfo) {
+    auto schema_json =
+            R"({
+            "name": "Products",
+            "fields": [
+                {"name": "product_name", "type": "string", "infix": true},
+                {"name": "category", "type": "string"},
+                {"name": "embedding", "type":"float[]", "embed":{"from": ["product_name", "category"], "model_config": {"model_name": "ts/e5-small"}}}
+            ]
+        })"_json;
+    
+    TextEmbedderManager::set_model_dir("/tmp/typesense_test/models");
+
+    auto collection_create_op = collectionManager.create_collection(schema_json);
+    ASSERT_TRUE(collection_create_op.ok());
+    auto coll1 = collection_create_op.get();
+
+    auto add_op = coll1->add(R"({
+        "product_name": "moisturizer",
+        "category": "beauty"
+    })"_json.dump());
+    ASSERT_TRUE(add_op.ok());
+
+
+    auto results = coll1->search("moisturizer", {"product_name", "embedding"},
+                                 "", {}, {}, {2}, 10,
+                                 1, FREQUENCY, {true},
+                                 0, spp::sparse_hash_set<std::string>()).get();
+    
+    ASSERT_EQ(1, results["hits"].size());
+
+    // Return all info
+    ASSERT_EQ(1, results["hits"][0].count("vector_distance"));
+    ASSERT_EQ(1, results["hits"][0].count("text_match_info"));
+    ASSERT_EQ(1, results["hits"][0].count("hybrid_search_info"));
 }
\ No newline at end of file

From 722cd3446d07072d54c2820f5065f92301413689 Mon Sep 17 00:00:00 2001
From: Kishore Nallan <kishorenc@gmail.com>
Date: Thu, 10 Aug 2023 18:38:12 +0530
Subject: [PATCH 11/16] Parsing vector float values in try.

---
 src/index.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/index.cpp b/src/index.cpp
index bc1845de..4a843349 100644
--- a/src/index.cpp
+++ b/src/index.cpp
@@ -867,9 +867,8 @@ void Index::index_field_in_memory(const field& afield, std::vector<index_record>
                                 continue;
                             }
 
-                            const std::vector<float>& float_vals = record.doc[afield.name].get<std::vector<float>>();
-
                             try {
+                                const std::vector<float>& float_vals = record.doc[afield.name].get<std::vector<float>>();
                                 if(afield.vec_dist == cosine) {
                                     std::vector<float> normalized_vals(afield.num_dim);
                                     hnsw_index_t::normalize_vector(float_vals, normalized_vals);

From f33163ff164bdaf7c1218e4b48f394f592e3781a Mon Sep 17 00:00:00 2001
From: Kishore Nallan <kishorenc@gmail.com>
Date: Thu, 10 Aug 2023 20:30:40 +0530
Subject: [PATCH 12/16] Fix regression in partial update of record with
 embedding.

---
 src/validator.cpp                      |  2 +-
 test/collection_vector_search_test.cpp | 36 ++++++++++++++++++++++++++
 2 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/src/validator.cpp b/src/validator.cpp
index f814c923..f8c23ee9 100644
--- a/src/validator.cpp
+++ b/src/validator.cpp
@@ -716,7 +716,7 @@ Option<bool> validator_t::validate_embed_fields(const nlohmann::json& document,
                 }
             }
         }
-        if(all_optional_and_null && !field.optional) {
+        if(all_optional_and_null && !field.optional && !is_update) {
             return Option<bool>(400, "No valid fields found to create embedding for `" + field.name + "`, please provide at least one valid field or make the embedding field optional.");
         }
     }
diff --git a/test/collection_vector_search_test.cpp b/test/collection_vector_search_test.cpp
index 2f55cd34..b55bb085 100644
--- a/test/collection_vector_search_test.cpp
+++ b/test/collection_vector_search_test.cpp
@@ -1033,6 +1033,42 @@ TEST_F(CollectionVectorTest, EmbedFromOptionalNullField) {
     ASSERT_TRUE(add_op.ok());
 }
 
+TEST_F(CollectionVectorTest, UpdateOfCollWithNonOptionalEmbeddingField) {
+    nlohmann::json schema = R"({
+        "name": "objects",
+        "fields": [
+            {"name": "name", "type": "string"},
+            {"name": "about", "type": "string"},
+            {"name": "embedding", "type":"float[]", "embed":{"from": ["name"], "model_config": {"model_name": "ts/e5-small"}}}
+        ]
+    })"_json;
+
+    TextEmbedderManager::set_model_dir("/tmp/typesense_test/models");
+
+    auto op = collectionManager.create_collection(schema);
+    ASSERT_TRUE(op.ok());
+    Collection* coll = op.get();
+
+    nlohmann::json object;
+    object["id"] = "0";
+    object["name"] = "butter";
+    object["about"] = "about butter";
+
+    auto add_op = coll->add(object.dump(), CREATE);
+    ASSERT_TRUE(add_op.ok());
+
+    nlohmann::json update_object;
+    update_object["id"] = "0";
+    update_object["about"] = "something about butter";
+    auto update_op = coll->add(update_object.dump(), EMPLACE);
+    ASSERT_TRUE(update_op.ok());
+
+    // action = update
+    update_object["about"] = "something about butter 2";
+    update_op = coll->add(update_object.dump(), UPDATE);
+    ASSERT_TRUE(update_op.ok());
+}
+
 TEST_F(CollectionVectorTest, SkipEmbeddingOpWhenValueExists) {
     nlohmann::json schema = R"({
         "name": "objects",

From c7e5285618074a8b3ba418382ede1b7e2cdeeef7 Mon Sep 17 00:00:00 2001
From: Kishore Nallan <kishorenc@gmail.com>
Date: Thu, 10 Aug 2023 20:50:57 +0530
Subject: [PATCH 13/16] Add additional test.

---
 test/collection_vector_search_test.cpp | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/test/collection_vector_search_test.cpp b/test/collection_vector_search_test.cpp
index b55bb085..c15161ae 100644
--- a/test/collection_vector_search_test.cpp
+++ b/test/collection_vector_search_test.cpp
@@ -1069,6 +1069,32 @@ TEST_F(CollectionVectorTest, UpdateOfCollWithNonOptionalEmbeddingField) {
     ASSERT_TRUE(update_op.ok());
 }
 
+TEST_F(CollectionVectorTest, FreshEmplaceWithOptionalEmbeddingReferencedField) {
+    auto schema = R"({
+        "name": "objects",
+        "fields": [
+            {"name": "name", "type": "string", "optional": true},
+            {"name": "about", "type": "string"},
+            {"name": "embedding", "type":"float[]", "embed":{"from": ["name"], "model_config": {"model_name": "ts/e5-small"}}}
+        ]
+    })"_json;
+
+    TextEmbedderManager::set_model_dir("/tmp/typesense_test/models");
+
+    auto op = collectionManager.create_collection(schema);
+    ASSERT_TRUE(op.ok());
+    Collection* coll = op.get();
+
+    nlohmann::json object;
+    object["id"] = "0";
+    object["about"] = "about butter";
+
+    auto add_op = coll->add(object.dump(), EMPLACE);
+    ASSERT_FALSE(add_op.ok());
+    ASSERT_EQ("No valid fields found to create embedding for `embedding`, please provide at least one valid field "
+              "or make the embedding field optional.", add_op.error());
+}
+
 TEST_F(CollectionVectorTest, SkipEmbeddingOpWhenValueExists) {
     nlohmann::json schema = R"({
         "name": "objects",

From bbf67e1979f2045a3f147eabb224021c0fea44a4 Mon Sep 17 00:00:00 2001
From: Kishore Nallan <kishorenc@gmail.com>
Date: Fri, 11 Aug 2023 09:25:57 +0530
Subject: [PATCH 14/16] Fix counter increment for query aggregation.

---
 src/index.cpp                          |  4 +-
 test/collection_specific_more_test.cpp | 70 ++++++++++++++++++++++++++
 2 files changed, 72 insertions(+), 2 deletions(-)

diff --git a/src/index.cpp b/src/index.cpp
index 4a843349..4d81126a 100644
--- a/src/index.cpp
+++ b/src/index.cpp
@@ -434,6 +434,8 @@ void Index::validate_and_preprocess(Index *index, std::vector<index_record>& ite
                 continue;
             }
 
+            handle_doc_ops(search_schema, index_rec.doc, index_rec.old_doc);
+
             if(do_validation) {
                 Option<uint32_t> validation_op = validator_t::validate_index_in_memory(index_rec.doc, index_rec.seq_id,
                                                                           default_sorting_field,
@@ -471,7 +473,6 @@ void Index::validate_and_preprocess(Index *index, std::vector<index_record>& ite
                     }
                 }
             } else {
-                handle_doc_ops(search_schema, index_rec.doc, index_rec.old_doc);
                 if(generate_embeddings) {
                     records_to_embed.push_back(&index_rec);
                 }
@@ -6260,7 +6261,6 @@ void Index::get_doc_changes(const index_operation_t op, const tsl::htrie_map<cha
             }
         }
     } else {
-        handle_doc_ops(search_schema, update_doc, old_doc);
         new_doc = old_doc;
         new_doc.merge_patch(update_doc);
 
diff --git a/test/collection_specific_more_test.cpp b/test/collection_specific_more_test.cpp
index fbc78e22..83181283 100644
--- a/test/collection_specific_more_test.cpp
+++ b/test/collection_specific_more_test.cpp
@@ -2123,6 +2123,76 @@ TEST_F(CollectionSpecificMoreTest, WeightTakingPrecendeceOverMatch) {
     ASSERT_EQ(2, res["hits"][1]["text_match_info"]["tokens_matched"].get<size_t>());
 }
 
+TEST_F(CollectionSpecificMoreTest, IncrementingCount) {
+    nlohmann::json schema = R"({
+        "name": "coll1",
+        "fields": [
+            {"name": "title", "type": "string"},
+            {"name": "count", "type": "int32"}
+        ]
+    })"_json;
+
+    Collection* coll1 = collectionManager.create_collection(schema).get();
+
+    // brand new document: create + upsert + emplace should work
+
+    nlohmann::json doc;
+    doc["id"] = "0";
+    doc["title"] = "Foo";
+    doc["$operations"]["increment"]["count"] = 1;
+    ASSERT_TRUE(coll1->add(doc.dump(), CREATE).ok());
+
+    doc.clear();
+    doc["id"] = "1";
+    doc["title"] = "Bar";
+    doc["$operations"]["increment"]["count"] = 1;
+    ASSERT_TRUE(coll1->add(doc.dump(), EMPLACE).ok());
+
+    doc.clear();
+    doc["id"] = "2";
+    doc["title"] = "Taz";
+    doc["$operations"]["increment"]["count"] = 1;
+    ASSERT_TRUE(coll1->add(doc.dump(), UPSERT).ok());
+
+    auto res = coll1->search("*", {}, "", {}, {}, {2}, 10, 1, FREQUENCY, {true}, 5,
+                             spp::sparse_hash_set<std::string>(),
+                             spp::sparse_hash_set<std::string>(), 10).get();
+
+    ASSERT_EQ(3, res["hits"].size());
+    ASSERT_EQ(1, res["hits"][0]["document"]["count"].get<size_t>());
+    ASSERT_EQ(1, res["hits"][1]["document"]["count"].get<size_t>());
+    ASSERT_EQ(1, res["hits"][2]["document"]["count"].get<size_t>());
+
+    // should support updates
+
+    doc.clear();
+    doc["id"] = "0";
+    doc["title"] = "Foo";
+    doc["$operations"]["increment"]["count"] = 3;
+    ASSERT_TRUE(coll1->add(doc.dump(), UPSERT).ok());
+
+    doc.clear();
+    doc["id"] = "1";
+    doc["title"] = "Bar";
+    doc["$operations"]["increment"]["count"] = 3;
+    ASSERT_TRUE(coll1->add(doc.dump(), EMPLACE).ok());
+
+    doc.clear();
+    doc["id"] = "2";
+    doc["title"] = "Bar";
+    doc["$operations"]["increment"]["count"] = 3;
+    ASSERT_TRUE(coll1->add(doc.dump(), UPDATE).ok());
+
+    res = coll1->search("*", {}, "", {}, {}, {2}, 10, 1, FREQUENCY, {true}, 5,
+                        spp::sparse_hash_set<std::string>(),
+                        spp::sparse_hash_set<std::string>(), 10).get();
+
+    ASSERT_EQ(3, res["hits"].size());
+    ASSERT_EQ(4, res["hits"][0]["document"]["count"].get<size_t>());
+    ASSERT_EQ(4, res["hits"][1]["document"]["count"].get<size_t>());
+    ASSERT_EQ(4, res["hits"][2]["document"]["count"].get<size_t>());
+}
+
 TEST_F(CollectionSpecificMoreTest, HighlightOnFieldNameWithDot) {
     nlohmann::json schema = R"({
         "name": "coll1",

From dafde32ce0a1a0f8813c78670ab14da4346b5057 Mon Sep 17 00:00:00 2001
From: ozanarmagan <o.armagan2020@gtu.edu.tr>
Date: Mon, 14 Aug 2023 23:25:18 +0300
Subject: [PATCH 15/16] Fix KV constructor parameters

---
 include/topster.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/topster.h b/include/topster.h
index c7378f2b..a16b4440 100644
--- a/include/topster.h
+++ b/include/topster.h
@@ -24,7 +24,7 @@ struct KV {
     // to be used only in final aggregation
     uint64_t* query_indices = nullptr;
 
-    KV(uint16_t queryIndex, uint64_t key, uint64_t distinct_key, uint8_t match_score_index, const int64_t *scores,
+    KV(uint16_t queryIndex, uint64_t key, uint64_t distinct_key, int8_t match_score_index, const int64_t *scores,
        reference_filter_result_t* reference_filter_result = nullptr):
             match_score_index(match_score_index), query_index(queryIndex), array_index(0), key(key),
             distinct_key(distinct_key), reference_filter_result(reference_filter_result) {

From e1e890279c0b9ffff93b8e0cd49f9796484ed247 Mon Sep 17 00:00:00 2001
From: Kishore Nallan <kishorenc@gmail.com>
Date: Tue, 15 Aug 2023 09:54:36 +0530
Subject: [PATCH 16/16] Refactor credential hiding logic.

---
 src/collection.cpp                     |  9 ++--
 test/collection_vector_search_test.cpp | 66 ++++++++++++++++++++++++++
 2 files changed, 69 insertions(+), 6 deletions(-)

diff --git a/src/collection.cpp b/src/collection.cpp
index 04625c8c..8883f4ab 100644
--- a/src/collection.cpp
+++ b/src/collection.cpp
@@ -261,10 +261,6 @@ nlohmann::json Collection::get_summary_json() const {
             field_json[fields::reference] = coll_field.reference;
         }
 
-        if(!coll_field.embed.empty()) {
-            field_json[fields::embed] = coll_field.embed;
-        }
-
         fields_arr.push_back(field_json);
     }
 
@@ -4936,9 +4932,10 @@ void Collection::hide_credential(nlohmann::json& json, const std::string& creden
         // hide api key with * except first 5 chars
         std::string credential_name_str = json[credential_name];
         if(credential_name_str.size() > 5) {
-            json[credential_name] = credential_name_str.replace(5, credential_name_str.size() - 5, credential_name_str.size() - 5, '*');
+            size_t num_chars_to_replace = credential_name_str.size() - 5;
+            json[credential_name] = credential_name_str.replace(5, num_chars_to_replace, num_chars_to_replace, '*');
         } else {
-            json[credential_name] = credential_name_str.replace(0, credential_name_str.size(), credential_name_str.size(), '*');
+            json[credential_name] = "***********";
         }
     }
 }
diff --git a/test/collection_vector_search_test.cpp b/test/collection_vector_search_test.cpp
index c15161ae..059e4437 100644
--- a/test/collection_vector_search_test.cpp
+++ b/test/collection_vector_search_test.cpp
@@ -1033,6 +1033,72 @@ TEST_F(CollectionVectorTest, EmbedFromOptionalNullField) {
     ASSERT_TRUE(add_op.ok());
 }
 
+TEST_F(CollectionVectorTest, HideCredential) {
+    auto schema_json =
+            R"({
+            "name": "Products",
+            "fields": [
+                {"name": "product_name", "type": "string", "infix": true},
+                {"name": "embedding", "type":"float[]", "embed":{"from": ["product_name"],
+                    "model_config": {
+                        "model_name": "ts/e5-small",
+                        "api_key": "ax-abcdef12345",
+                        "access_token": "ax-abcdef12345",
+                        "refresh_token": "ax-abcdef12345",
+                        "client_id": "ax-abcdef12345",
+                        "client_secret": "ax-abcdef12345",
+                        "project_id": "ax-abcdef12345"
+                    }}}
+            ]
+        })"_json;
+
+    TextEmbedderManager::set_model_dir("/tmp/typesense_test/models");
+
+    auto collection_create_op = collectionManager.create_collection(schema_json);
+    ASSERT_TRUE(collection_create_op.ok());
+    auto coll1 = collection_create_op.get();
+    auto coll_summary = coll1->get_summary_json();
+
+    ASSERT_EQ("ax-ab*********", coll_summary["fields"][1]["embed"]["model_config"]["api_key"].get<std::string>());
+    ASSERT_EQ("ax-ab*********", coll_summary["fields"][1]["embed"]["model_config"]["access_token"].get<std::string>());
+    ASSERT_EQ("ax-ab*********", coll_summary["fields"][1]["embed"]["model_config"]["refresh_token"].get<std::string>());
+    ASSERT_EQ("ax-ab*********", coll_summary["fields"][1]["embed"]["model_config"]["client_id"].get<std::string>());
+    ASSERT_EQ("ax-ab*********", coll_summary["fields"][1]["embed"]["model_config"]["client_secret"].get<std::string>());
+    ASSERT_EQ("ax-ab*********", coll_summary["fields"][1]["embed"]["model_config"]["project_id"].get<std::string>());
+
+    // small api key
+
+    schema_json =
+            R"({
+            "name": "Products2",
+            "fields": [
+                {"name": "product_name", "type": "string", "infix": true},
+                {"name": "embedding", "type":"float[]", "embed":{"from": ["product_name"],
+                    "model_config": {
+                        "model_name": "ts/e5-small",
+                        "api_key": "ax1",
+                        "access_token": "ax1",
+                        "refresh_token": "ax1",
+                        "client_id": "ax1",
+                        "client_secret": "ax1",
+                        "project_id": "ax1"
+                    }}}
+            ]
+        })"_json;
+
+    collection_create_op = collectionManager.create_collection(schema_json);
+    ASSERT_TRUE(collection_create_op.ok());
+    auto coll2 = collection_create_op.get();
+    coll_summary = coll2->get_summary_json();
+
+    ASSERT_EQ("***********", coll_summary["fields"][1]["embed"]["model_config"]["api_key"].get<std::string>());
+    ASSERT_EQ("***********", coll_summary["fields"][1]["embed"]["model_config"]["access_token"].get<std::string>());
+    ASSERT_EQ("***********", coll_summary["fields"][1]["embed"]["model_config"]["refresh_token"].get<std::string>());
+    ASSERT_EQ("***********", coll_summary["fields"][1]["embed"]["model_config"]["client_id"].get<std::string>());
+    ASSERT_EQ("***********", coll_summary["fields"][1]["embed"]["model_config"]["client_secret"].get<std::string>());
+    ASSERT_EQ("***********", coll_summary["fields"][1]["embed"]["model_config"]["project_id"].get<std::string>());
+}
+
 TEST_F(CollectionVectorTest, UpdateOfCollWithNonOptionalEmbeddingField) {
     nlohmann::json schema = R"({
         "name": "objects",