Merge branch 'v0.25-join'

2025-05-22 06:40:30 +08:00 · 2023-09-11 10:34:38 -05:00 · 2023-09-11 10:34:38 -05:00 · 3bbfe20fcc
commit 3bbfe20fcc
parent 43f4d65412 2af676916a
24 changed files with 908 additions and 143 deletions
--- a/include/analytics_manager.h
+++ b/include/analytics_manager.h
@ -79,7 +79,7 @@ public:
    Option<bool> remove_rule(const std::string& name);

    void add_suggestion(const std::string& query_collection,
-                        std::string& query, bool live_query, const std::string& user_id);
+                        const std::string& query, bool live_query, const std::string& user_id);

    void stop();

--- a/include/collection.h
+++ b/include/collection.h
@ -162,7 +162,7 @@ private:

    void remove_document(const nlohmann::json & document, const uint32_t seq_id, bool remove_from_store);

-    void process_remove_field_for_embedding_fields(const field& the_field, std::vector<field>& garbage_fields);
+    void process_remove_field_for_embedding_fields(const field& del_field, std::vector<field>& garbage_embed_fields);

    void curate_results(string& actual_query, const string& filter_query, bool enable_overrides, bool already_segmented,
                        const std::map<size_t, std::vector<std::string>>& pinned_hits,
--- a/include/field.h
+++ b/include/field.h
@ -424,10 +424,11 @@ struct field {
                                              std::string& fallback_field_type,
                                              std::vector<field>& the_fields);

-    static Option<bool> validate_and_init_embed_fields(const std::vector<std::pair<size_t, size_t>>& embed_json_field_indices,
-                                                       const tsl::htrie_map<char, field>& search_schema,
-                                                       nlohmann::json& fields_json,
-                                                       std::vector<field>& fields_vec);
+    static Option<bool> validate_and_init_embed_field(const tsl::htrie_map<char, field>& search_schema,
+                                                       nlohmann::json& field_json,
+                                                       const nlohmann::json& fields_json,
+                                                       field& the_field);
+

    static bool flatten_obj(nlohmann::json& doc, nlohmann::json& value, bool has_array, bool has_obj_array,
                            bool is_update, const field& the_field, const std::string& flat_name,
--- a/include/index.h
+++ b/include/index.h
@ -532,7 +532,7 @@ private:
    static void handle_doc_ops(const tsl::htrie_map<char, field>& search_schema,
                               nlohmann::json& update_doc, const nlohmann::json& old_doc);

-    static void get_doc_changes(const index_operation_t op, const tsl::htrie_map<char, field>& search_schema,
+    static void get_doc_changes(const index_operation_t op, const tsl::htrie_map<char, field>& embedding_fields,
                                nlohmann::json &update_doc, const nlohmann::json &old_doc, nlohmann::json &new_doc,
                                nlohmann::json &del_doc);

--- a/include/text_embedder_manager.h
+++ b/include/text_embedder_manager.h
@ -72,6 +72,10 @@ public:
    Option<bool> validate_and_init_local_model(const nlohmann::json& model_config, size_t& num_dims);
    Option<bool> validate_and_init_model(const nlohmann::json& model_config, size_t& num_dims);

+    std::unordered_map<std::string, std::shared_ptr<TextEmbedder>> _get_text_embedders() {
+        return text_embedders;
+    }
+
 private:
    TextEmbedderManager() = default;

--- a/include/text_embedder_remote.h
+++ b/include/text_embedder_remote.h
@ -31,6 +31,7 @@ class RemoteEmbedder {
        virtual nlohmann::json get_error_json(const nlohmann::json& req_body, long res_code, const std::string& res_body) = 0;
        virtual embedding_res_t Embed(const std::string& text, const size_t remote_embedder_timeout_ms = 30000, const size_t remote_embedding_num_tries = 2) = 0;
        virtual std::vector<embedding_res_t> batch_embed(const std::vector<std::string>& inputs, const size_t remote_embedding_batch_size = 200) = 0;
+        static const std::string get_model_key(const nlohmann::json& model_config);
        static void init(ReplicationState* rs) {
            raft_server = rs;
        }
@ -51,6 +52,7 @@ class OpenAIEmbedder : public RemoteEmbedder {
        embedding_res_t Embed(const std::string& text, const size_t remote_embedder_timeout_ms = 30000, const size_t remote_embedding_num_tries = 2) override;
        std::vector<embedding_res_t> batch_embed(const std::vector<std::string>& inputs, const size_t remote_embedding_batch_size = 200) override;
        nlohmann::json get_error_json(const nlohmann::json& req_body, long res_code, const std::string& res_body) override;
+        static std::string get_model_key(const nlohmann::json& model_config);
 };


@ -68,6 +70,7 @@ class GoogleEmbedder : public RemoteEmbedder {
        embedding_res_t Embed(const std::string& text, const size_t remote_embedder_timeout_ms = 30000, const size_t remote_embedding_num_tries = 2) override;
        std::vector<embedding_res_t> batch_embed(const std::vector<std::string>& inputs, const size_t remote_embedding_batch_size = 200) override;
        nlohmann::json get_error_json(const nlohmann::json& req_body, long res_code, const std::string& res_body) override;
+        static std::string get_model_key(const nlohmann::json& model_config);
 };


@ -95,6 +98,7 @@ class GCPEmbedder : public RemoteEmbedder {
        embedding_res_t Embed(const std::string& text, const size_t remote_embedder_timeout_ms = 30000, const size_t remote_embedding_num_tries = 2) override;
        std::vector<embedding_res_t> batch_embed(const std::vector<std::string>& inputs, const size_t remote_embedding_batch_size = 200) override;
        nlohmann::json get_error_json(const nlohmann::json& req_body, long res_code, const std::string& res_body) override;
+        static std::string get_model_key(const nlohmann::json& model_config);
 };


--- a/include/tokenizer.h
+++ b/include/tokenizer.h
@ -88,5 +88,5 @@ public:

    bool should_skip_char(char c);

-    static void normalize_ascii(std::string& text);
+    static std::string normalize_ascii_no_spaces(const std::string& text);
 };
--- a/include/tsconfig.h
+++ b/include/tsconfig.h
@ -787,6 +787,10 @@ public:
        cors_domains.insert(cors_values_vec.begin(), cors_values_vec.end());
    }

+    void set_enable_search_analytics(bool enable_search_analytics) {
+        this->enable_search_analytics = enable_search_analytics;
+    }
+
    // validation

    Option<bool> is_valid() {
--- a/src/analytics_manager.cpp
+++ b/src/analytics_manager.cpp
@ -203,7 +203,7 @@ Option<bool> AnalyticsManager::remove_popular_queries_index(const std::string &n
    return Option<bool>(true);
 }

-void AnalyticsManager::add_suggestion(const std::string &query_collection, std::string &query,
+void AnalyticsManager::add_suggestion(const std::string &query_collection, const std::string& query,
                                      const bool live_query, const std::string& user_id) {
    // look up suggestion collections for the query collection
    std::unique_lock lock(mutex);
@ -212,7 +212,6 @@ void AnalyticsManager::add_suggestion(const std::string &query_collection, std::
        for(const auto& suggestion_collection: suggestion_collections_it->second) {
            const auto& popular_queries_it = popular_queries.find(suggestion_collection);
            if(popular_queries_it != popular_queries.end()) {
-                Tokenizer::normalize_ascii(query);
                popular_queries_it->second->add(query, live_query, user_id);
            }
        }
@ -235,6 +234,8 @@ void AnalyticsManager::run(ReplicationState* raft_server) {
        }

        persist_suggestions(raft_server, prev_persistence_s);
+        prev_persistence_s = std::chrono::duration_cast<std::chrono::seconds>(
+                std::chrono::system_clock::now().time_since_epoch()).count();

        lk.unlock();
    }
@ -270,8 +271,6 @@ void AnalyticsManager::persist_suggestions(ReplicationState *raft_server, uint64
            continue;
        }

-        prev_persistence_s = now_ts_seconds;
-
        std::string import_payload;
        popularQueries->serialize_as_docs(import_payload);

--- a/src/collection.cpp
+++ b/src/collection.cpp
@ -52,12 +52,6 @@ Collection::Collection(const std::string& name, const uint32_t collection_id, co
        symbols_to_index(to_char_array(symbols_to_index)), token_separators(to_char_array(token_separators)),
        index(init_index()) {

-    for (auto const& field: fields) {
-        if (field.embed.count(fields::from) != 0) {
-            embedding_fields.emplace(field.name, field);
-        }
-    }
-
    this->num_documents = 0;
 }

@ -3942,7 +3936,6 @@ Option<bool> Collection::alter(nlohmann::json& alter_payload) {
        }
    }

-
    // hide credentials in the alter payload return
    for(auto& field_json : alter_payload["fields"]) {
        if(field_json[fields::embed].count(fields::model_config) != 0) {
@ -3955,8 +3948,6 @@ Option<bool> Collection::alter(nlohmann::json& alter_payload) {
        }
    }

-    
-
    return Option<bool>(true);
 }

@ -4165,7 +4156,6 @@ Option<bool> Collection::validate_alter_payload(nlohmann::json& schema_changes,
    }

    std::unordered_map<std::string, field> new_dynamic_fields;
-    std::vector<std::pair<size_t, size_t>> embed_json_field_indices;
    int json_array_index = -1;

    for(const auto& kv: schema_changes["fields"].items()) {
@ -4253,7 +4243,7 @@ Option<bool> Collection::validate_alter_payload(nlohmann::json& schema_changes,
                    return parse_op;
                }

-                const auto& f = diff_fields.back();
+                auto& f = diff_fields.back();

                if(f.is_dynamic()) {
                    new_dynamic_fields[f.name] = f;
@ -4261,6 +4251,14 @@ Option<bool> Collection::validate_alter_payload(nlohmann::json& schema_changes,
                    updated_search_schema[f.name] = f;
                }

+                if(!f.embed.empty()) {
+                    auto validate_res = field::validate_and_init_embed_field(search_schema, schema_changes["fields"][json_array_index], schema_changes["fields"], f);
+
+                    if(!validate_res.ok()) {
+                        return validate_res;
+                    }
+                }
+
                if(is_reindex) {
                    reindex_fields.push_back(f);
                } else {
@ -4295,9 +4293,7 @@ Option<bool> Collection::validate_alter_payload(nlohmann::json& schema_changes,
                    }
                }

-                if(!f.embed.empty() && !diff_fields.empty()) {
-                    embed_json_field_indices.emplace_back(json_array_index, diff_fields.size()-1);
-                }
+

            } else {
                // partial update is not supported for now
@ -4307,12 +4303,6 @@ Option<bool> Collection::validate_alter_payload(nlohmann::json& schema_changes,
        }
    }

-    auto validation_op = field::validate_and_init_embed_fields(embed_json_field_indices, search_schema,
-                                                               schema_changes["fields"], diff_fields);
-    if(!validation_op.ok()) {
-        return validation_op;
-    }
-
    if(num_auto_detect_fields > 1) {
        return Option<bool>(400, "There can be only one field named `.*`.");
    }
@ -4904,27 +4894,43 @@ Option<bool> Collection::populate_include_exclude_fields_lk(const spp::sparse_ha
 }

 // Removes the dropped field from embed_from of all embedding fields.
-void Collection::process_remove_field_for_embedding_fields(const field& the_field, std::vector<field>& garbage_fields) {
+void Collection::process_remove_field_for_embedding_fields(const field& del_field,
+                                                           std::vector<field>& garbage_embed_fields) {
    for(auto& field : fields) {
        if(field.embed.count(fields::from) == 0) {
            continue;
        }
-        auto embed_from = field.embed[fields::from].get<std::vector<std::string>>();
-        embed_from.erase(std::remove_if(embed_from.begin(), embed_from.end(), [&the_field](std::string field_name) {
-            return the_field.name == field_name;
-        }));
-        field.embed[fields::from] = std::move(embed_from);
-        embedding_fields[field.name] = field;

-        // mark this embedding field as "garbage" if it has no more embed_from fields
-        if(embed_from.empty()) {
-            embedding_fields.erase(field.name);
-            garbage_fields.push_back(field);
+        bool found_field = false;
+        nlohmann::json& embed_from_names = field.embed[fields::from];
+        for(auto it = embed_from_names.begin(); it != embed_from_names.end();) {
+            if(it.value() == del_field.name) {
+                it = embed_from_names.erase(it);
+                found_field = true;
+            } else {
+                it++;
+            }
        }

-
+        if(found_field) {
+            // mark this embedding field as "garbage" if it has no more embed_from fields
+            if(embed_from_names.empty()) {
+                garbage_embed_fields.push_back(field);
+            } else {
+                // the dropped field was present in `embed_from`, so we have to update the field objects
+                field.embed[fields::from] = embed_from_names;
+                embedding_fields[field.name].embed[fields::from] = embed_from_names;
+            }
+        }
    }

+    for(auto& garbage_field: garbage_embed_fields) {
+        embedding_fields.erase(garbage_field.name);
+        search_schema.erase(garbage_field.name);
+        fields.erase(std::remove_if(fields.begin(), fields.end(), [&garbage_field](const auto &f) {
+            return f.name == garbage_field.name;
+        }), fields.end());
+    }
 }

 void Collection::hide_credential(nlohmann::json& json, const std::string& credential_name) {
@ -4939,10 +4945,15 @@ void Collection::hide_credential(nlohmann::json& json, const std::string& creden
        }
    }
 }
+
 Option<bool> Collection::truncate_after_top_k(const string &field_name, size_t k) {
+    std::shared_lock slock(mutex);
+
    std::vector<uint32_t> seq_ids;
    auto op = index->seq_ids_outside_top_k(field_name, k, seq_ids);

+    slock.unlock();
+
    if(!op.ok()) {
        return op;
    }
--- a/src/collection_manager.cpp
+++ b/src/collection_manager.cpp
@ -766,7 +766,9 @@ Option<bool> CollectionManager::do_search(std::map<std::string, std::string>& re
        nlohmann::json preset;
        const auto& preset_op = CollectionManager::get_instance().get_preset(preset_it->second, preset);

-        if(preset_op.ok()) {
+        // NOTE: we merge only single preset configuration because multi ("searches") preset value replaces
+        // the request body directly before we reach this single search request function.
+        if(preset_op.ok() && !preset.contains("searches")) {
            if(!preset.is_object()) {
                return Option<bool>(400, "Search preset is not an object.");
            }
@ -1112,7 +1114,7 @@ Option<bool> CollectionManager::do_search(std::map<std::string, std::string>& re

    if(Config::get_instance().get_enable_search_analytics()) {
        if(result.count("found") != 0 && result["found"].get<size_t>() != 0) {
-            std::string analytics_query = raw_query;
+            std::string analytics_query = Tokenizer::normalize_ascii_no_spaces(raw_query);
            AnalyticsManager::get_instance().add_suggestion(orig_coll_name, analytics_query,
                                                            true, req_params["x-typesense-user-id"]);
        }
--- a/src/core_api.cpp
+++ b/src/core_api.cpp
@ -729,7 +729,7 @@ bool get_export_documents(const std::shared_ptr<http_req>& req, const std::share
        }
    }

-    res->content_type_header = "text/plain; charset=utf8";
+    res->content_type_header = "text/plain; charset=utf-8";
    res->status_code = 200;

    stream_response(req, res);
@ -902,7 +902,7 @@ bool post_import_documents(const std::shared_ptr<http_req>& req, const std::shar
        }
    }

-    res->content_type_header = "text/plain; charset=utf8";
+    res->content_type_header = "text/plain; charset=utf-8";
    res->status_code = 200;
    res->body = response_stream.str();

--- a/src/field.cpp
+++ b/src/field.cpp
@ -1083,7 +1083,7 @@ void field::compact_nested_fields(tsl::htrie_map<char, field>& nested_fields) {
 Option<bool> field::json_fields_to_fields(bool enable_nested_fields, nlohmann::json &fields_json, string &fallback_field_type,
                                          std::vector<field>& the_fields) {
    size_t num_auto_detect_fields = 0;
-    std::vector<std::pair<size_t, size_t>> embed_json_field_indices;
+    const tsl::htrie_map<char, field> dummy_search_schema;

    for(size_t i = 0; i < fields_json.size(); i++) {
        nlohmann::json& field_json = fields_json[i];
@ -1094,17 +1094,13 @@ Option<bool> field::json_fields_to_fields(bool enable_nested_fields, nlohmann::j
        }

        if(!the_fields.empty() && !the_fields.back().embed.empty()) {
-            embed_json_field_indices.emplace_back(i, i);
+            auto validate_res = validate_and_init_embed_field(dummy_search_schema, field_json, fields_json, the_fields.back());
+            if(!validate_res.ok()) {
+                return validate_res;
+            }
        }
    }

-    const tsl::htrie_map<char, field> dummy_search_schema;
-    auto validation_op = field::validate_and_init_embed_fields(embed_json_field_indices, dummy_search_schema,
-                                                               fields_json, the_fields);
-    if(!validation_op.ok()) {
-        return validation_op;
-    }
-
    if(num_auto_detect_fields > 1) {
        return Option<bool>(400,"There can be only one field named `.*`.");
    }
@ -1112,49 +1108,47 @@ Option<bool> field::json_fields_to_fields(bool enable_nested_fields, nlohmann::j
    return Option<bool>(true);
 }

-Option<bool> field::validate_and_init_embed_fields(const std::vector<std::pair<size_t, size_t>>& embed_json_field_indices,
-                                                   const tsl::htrie_map<char, field>& search_schema,
-                                                   nlohmann::json& fields_json,
-                                                   std::vector<field>& fields_vec) {
-
-    for(const auto& json_field_index: embed_json_field_indices) {
-        auto& field_json = fields_json[json_field_index.first];
-        const std::string err_msg = "Property `" + fields::embed + "." + fields::from +
+Option<bool> field::validate_and_init_embed_field(const tsl::htrie_map<char, field>& search_schema, nlohmann::json& field_json,
+                                                  const nlohmann::json& fields_json,
+                                                  field& the_field) {
+    const std::string err_msg = "Property `" + fields::embed + "." + fields::from +
                                    "` can only refer to string or string array fields.";

-        for(auto& field_name : field_json[fields::embed][fields::from].get<std::vector<std::string>>()) {
-            auto embed_field = std::find_if(fields_json.begin(), fields_json.end(), [&field_name](const nlohmann::json& x) {
-                return x["name"].get<std::string>() == field_name;
-            });
+    for(auto& field_name : field_json[fields::embed][fields::from].get<std::vector<std::string>>()) {

-            if(embed_field == fields_json.end()) {
-                const auto& embed_field2 = search_schema.find(field_name);
-                if (embed_field2 == search_schema.end()) {
-                    return Option<bool>(400, err_msg);
-                } else if (embed_field2->type != field_types::STRING && embed_field2->type != field_types::STRING_ARRAY) {
-                    return Option<bool>(400, err_msg);
-                }
-            } else if((*embed_field)[fields::type] != field_types::STRING &&
-                      (*embed_field)[fields::type] != field_types::STRING_ARRAY) {
+        auto embed_field = std::find_if(fields_json.begin(), fields_json.end(), [&field_name](const nlohmann::json& x) {
+            return x["name"].get<std::string>() == field_name;
+        });
+
+
+        if(embed_field == fields_json.end()) {
+            const auto& embed_field2 = search_schema.find(field_name);
+            if (embed_field2 == search_schema.end()) {
+                return Option<bool>(400, err_msg);
+            } else if (embed_field2->type != field_types::STRING && embed_field2->type != field_types::STRING_ARRAY) {
                return Option<bool>(400, err_msg);
            }
+        } else if((*embed_field)[fields::type] != field_types::STRING &&
+                  (*embed_field)[fields::type] != field_types::STRING_ARRAY) {
+            return Option<bool>(400, err_msg);
        }
-
-        const auto& model_config = field_json[fields::embed][fields::model_config];
-        size_t num_dim = 0;
-        auto res = TextEmbedderManager::get_instance().validate_and_init_model(model_config, num_dim);
-        if(!res.ok()) {
-            return Option<bool>(res.code(), res.error());
-        }
-
-        LOG(INFO) << "Model init done.";
-        field_json[fields::num_dim] = num_dim;
-        fields_vec[json_field_index.second].num_dim = num_dim;
    }

+    const auto& model_config = field_json[fields::embed][fields::model_config];
+    size_t num_dim = 0;
+    auto res = TextEmbedderManager::get_instance().validate_and_init_model(model_config, num_dim);
+    if(!res.ok()) {
+        return Option<bool>(res.code(), res.error());
+    }
+    
+    LOG(INFO) << "Model init done.";
+    field_json[fields::num_dim] = num_dim;
+    the_field.num_dim = num_dim;
+
    return Option<bool>(true);
 }

+
 void filter_result_t::and_filter_results(const filter_result_t& a, const filter_result_t& b, filter_result_t& result) {
    auto lenA = a.count, lenB = b.count;
    if (lenA == 0 || lenB == 0) {
--- a/src/index.cpp
+++ b/src/index.cpp
@ -454,7 +454,7 @@ void Index::validate_and_preprocess(Index *index, std::vector<index_record>& ite

            if(index_rec.is_update) {
                // scrub string fields to reduce delete ops
-                get_doc_changes(index_rec.operation, search_schema, index_rec.doc, index_rec.old_doc,
+                get_doc_changes(index_rec.operation, embedding_fields, index_rec.doc, index_rec.old_doc,
                                index_rec.new_doc, index_rec.del_doc);

                if(generate_embeddings) {
@ -870,12 +870,16 @@ void Index::index_field_in_memory(const field& afield, std::vector<index_record>

                            try {
                                const std::vector<float>& float_vals = record.doc[afield.name].get<std::vector<float>>();
-                                if(afield.vec_dist == cosine) {
-                                    std::vector<float> normalized_vals(afield.num_dim);
-                                    hnsw_index_t::normalize_vector(float_vals, normalized_vals);
-                                    vec_index->addPoint(normalized_vals.data(), (size_t)record.seq_id, true);
+                                if(float_vals.size() != afield.num_dim) {
+                                    record.index_failure(400, "Vector size mismatch.");
                                } else {
-                                    vec_index->addPoint(float_vals.data(), (size_t)record.seq_id, true);
+                                    if(afield.vec_dist == cosine) {
+                                        std::vector<float> normalized_vals(afield.num_dim);
+                                        hnsw_index_t::normalize_vector(float_vals, normalized_vals);
+                                        vec_index->addPoint(normalized_vals.data(), (size_t)record.seq_id, true);
+                                    } else {
+                                        vec_index->addPoint(float_vals.data(), (size_t)record.seq_id, true);
+                                    }
                                }
                            } catch(const std::exception &e) {
                                record.index_failure(400, e.what());
@ -3200,8 +3204,8 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons

                for(size_t res_index = 0; res_index < vec_results.size(); res_index++) {
                    auto& vec_result = vec_results[res_index];
-                    auto doc_id = vec_result.first;
-                    auto result_it = topster->kv_map.find(doc_id);
+                    auto seq_id = vec_result.first;
+                    auto result_it = topster->kv_map.find(seq_id);

                    if(result_it != topster->kv_map.end()) {
                        if(result_it->second->match_score_index < 0 || result_it->second->match_score_index > 2) {
@ -3210,22 +3214,23 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons

                        // result overlaps with keyword search: we have to combine the scores

-                        auto result = result_it->second;
+                        KV* kv = result_it->second;
                        // old_score + (1 / rank_of_document) * WEIGHT)
-                        result->vector_distance = vec_result.second;
-                        result->text_match_score  = result->scores[result->match_score_index];
+                        kv->vector_distance = vec_result.second;
+                        kv->text_match_score  = kv->scores[kv->match_score_index];
                        int64_t match_score = float_to_int64_t(
-                                (int64_t_to_float(result->scores[result->match_score_index])) +
+                                (int64_t_to_float(kv->scores[kv->match_score_index])) +
                                ((1.0 / (res_index + 1)) * VECTOR_SEARCH_WEIGHT));
                        int64_t match_score_index = -1;
                        int64_t scores[3] = {0};
                        
-                        compute_sort_scores(sort_fields_std, sort_order, field_values, geopoint_indices, doc_id, 0, match_score, scores, match_score_index, vec_result.second);
+                        compute_sort_scores(sort_fields_std, sort_order, field_values, geopoint_indices, seq_id, 0,
+                                            match_score, scores, match_score_index, vec_result.second);

                        for(int i = 0; i < 3; i++) {
-                            result->scores[i] = scores[i];
+                            kv->scores[i] = scores[i];
                        }
-                        result->match_score_index = match_score_index;
+                        kv->match_score_index = match_score_index;

                    } else {
                        // Result has been found only in vector search: we have to add it to both KV and result_ids
@ -3233,12 +3238,21 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
                        int64_t scores[3] = {0};
                        int64_t match_score = float_to_int64_t((1.0 / (res_index + 1)) * VECTOR_SEARCH_WEIGHT);
                        int64_t match_score_index = -1;
-                        compute_sort_scores(sort_fields_std, sort_order, field_values, geopoint_indices, doc_id, 0, match_score, scores, match_score_index, vec_result.second);
-                        KV kv(searched_queries.size(), doc_id, doc_id, match_score_index, scores);
+                        compute_sort_scores(sort_fields_std, sort_order, field_values, geopoint_indices, seq_id, 0, match_score, scores, match_score_index, vec_result.second);
+
+                        uint64_t distinct_id = seq_id;
+                        if (group_limit != 0) {
+                            distinct_id = get_distinct_id(group_by_fields, seq_id);
+                            if(excluded_group_ids.count(distinct_id) != 0) {
+                                continue;
+                            }
+                        }
+
+                        KV kv(searched_queries.size(), seq_id, distinct_id, match_score_index, scores);
                        kv.text_match_score = 0;
                        kv.vector_distance = vec_result.second;
                        topster->add(&kv);
-                        vec_search_ids.push_back(doc_id);
+                        vec_search_ids.push_back(seq_id);
                    }
                }

@ -3967,8 +3981,6 @@ void Index::search_across_fields(const std::vector<token_t>& query_tokens,
        dropped_token_its.push_back(std::move(token_fields));
    }

-
-
    // one iterator for each token, each underlying iterator contains results of token across multiple fields
    std::vector<or_iterator_t> token_its;

@ -4060,6 +4072,28 @@ void Index::search_across_fields(const std::vector<token_t>& query_tokens,
            }
        }

+        size_t query_len = query_tokens.size();
+
+        // check if seq_id exists in any of the dropped_token iters
+        for(size_t ti = 0; ti < dropped_token_its.size(); ti++) {
+            or_iterator_t& token_fields_iters = dropped_token_its[ti];
+            if(token_fields_iters.skip_to(seq_id) && token_fields_iters.id() == seq_id) {
+                query_len++;
+                const std::vector<posting_list_t::iterator_t>& field_iters = token_fields_iters.get_its();
+                for(size_t fi = 0; fi < field_iters.size(); fi++) {
+                    const posting_list_t::iterator_t& field_iter = field_iters[fi];
+                    if(field_iter.id() == seq_id) {
+                        // not all fields might contain a given token
+                        field_to_tokens[field_iter.get_field_id()].push_back(field_iter.clone());
+                    }
+                }
+            }
+        }
+
+        if(syn_orig_num_tokens != -1) {
+            query_len = syn_orig_num_tokens;
+        }
+
        int64_t best_field_match_score = 0, best_field_weight = 0;
        uint32_t num_matching_fields = 0;

@ -4113,18 +4147,6 @@ void Index::search_across_fields(const std::vector<token_t>& query_tokens,
        compute_sort_scores(sort_fields, sort_order, field_values, geopoint_indices, seq_id, filter_index,
                            best_field_match_score, scores, match_score_index);

-        size_t query_len = query_tokens.size();
-
-        // check if seq_id exists in any of the dropped_token iters and increment matching fields accordingly
-        for(auto& dropped_token_it: dropped_token_its) {
-            if(dropped_token_it.skip_to(seq_id) && dropped_token_it.id() == seq_id) {
-                query_len++;
-            }
-        }
-
-        if(syn_orig_num_tokens != -1) {
-            query_len = syn_orig_num_tokens;
-        }
        query_len = std::min<size_t>(15, query_len);

        // NOTE: `query_len` is total tokens matched across fields.
@ -6244,7 +6266,7 @@ void Index::handle_doc_ops(const tsl::htrie_map<char, field>& search_schema,
    }
 }

-void Index::get_doc_changes(const index_operation_t op, const tsl::htrie_map<char, field>& search_schema,
+void Index::get_doc_changes(const index_operation_t op, const tsl::htrie_map<char, field>& embedding_fields,
                            nlohmann::json& update_doc, const nlohmann::json& old_doc, nlohmann::json& new_doc,
                            nlohmann::json& del_doc) {

@ -6257,7 +6279,12 @@ void Index::get_doc_changes(const index_operation_t op, const tsl::htrie_map<cha
            }

            if(!update_doc.contains(it.key())) {
-                del_doc[it.key()] = it.value();
+                // embedding field won't be part of upsert doc so populate new doc with the value from old doc
+                if(embedding_fields.count(it.key()) != 0) {
+                    new_doc[it.key()] = it.value();
+                } else {
+                    del_doc[it.key()] = it.value();
+                }
            }
        }
    } else {
@ -6311,9 +6338,10 @@ size_t Index::num_seq_ids() const {

 Option<bool> Index::seq_ids_outside_top_k(const std::string& field_name, size_t k,
                                          std::vector<uint32_t>& outside_seq_ids) {
+    std::shared_lock lock(mutex);
    auto field_it = numerical_index.find(field_name);

-    if(field_it == sort_index.end()) {
+    if(field_it == numerical_index.end()) {
        return Option<bool>(400, "Field not found in numerical index.");
    }

--- a/src/text_embedder.cpp
+++ b/src/text_embedder.cpp
@ -117,6 +117,11 @@ embedding_res_t TextEmbedder::Embed(const std::string& text, const size_t remote
        input_shapes.push_back({1, static_cast<int64_t>(encoded_input.input_ids.size())});
        input_shapes.push_back({1, static_cast<int64_t>(encoded_input.attention_mask.size())});
        if(session_->GetInputCount() == 3) {
+            // edge case: xlm_roberta does not have token_type_ids, but if the model has it as input, we need to fill it with 0s
+            if(encoded_input.token_type_ids.size() == 0) {
+                encoded_input.token_type_ids.resize(encoded_input.input_ids.size(), 0);
+            }
+
            input_shapes.push_back({1, static_cast<int64_t>(encoded_input.token_type_ids.size())});
        }
        input_tensors.push_back(Ort::Value::CreateTensor<int64_t>(memory_info, encoded_input.input_ids.data(), encoded_input.input_ids.size(), input_shapes[0].data(), input_shapes[0].size()));
--- a/src/text_embedder_manager.cpp
+++ b/src/text_embedder_manager.cpp
@ -43,9 +43,10 @@ Option<bool> TextEmbedderManager::validate_and_init_remote_model(const nlohmann:
    }

    std::unique_lock<std::mutex> lock(text_embedders_mutex);
-    auto text_embedder_it = text_embedders.find(model_name);
+    std::string model_key = is_remote_model(model_name) ? RemoteEmbedder::get_model_key(model_config) : model_name;
+    auto text_embedder_it = text_embedders.find(model_key);
    if(text_embedder_it == text_embedders.end()) {
-        text_embedders.emplace(model_name, std::make_shared<TextEmbedder>(model_config, num_dims));
+        text_embedders.emplace(model_key, std::make_shared<TextEmbedder>(model_config, num_dims));
    }

    return Option<bool>(true);
@ -122,7 +123,8 @@ Option<bool> TextEmbedderManager::validate_and_init_local_model(const nlohmann::
 Option<TextEmbedder*> TextEmbedderManager::get_text_embedder(const nlohmann::json& model_config) {
    std::unique_lock<std::mutex> lock(text_embedders_mutex);
    const std::string& model_name = model_config.at("model_name");
-    auto text_embedder_it = text_embedders.find(model_name);
+    std::string model_key = is_remote_model(model_name) ? RemoteEmbedder::get_model_key(model_config) : model_name;
+    auto text_embedder_it = text_embedders.find(model_key);

    if(text_embedder_it == text_embedders.end()) {
        return Option<TextEmbedder*>(404, "Text embedder was not found.");
--- a/src/text_embedder_remote.cpp
+++ b/src/text_embedder_remote.cpp
@ -53,6 +53,21 @@ long RemoteEmbedder::call_remote_api(const std::string& method, const std::strin
                                                    proxy_call_timeout_ms, true);
 }

+
+const std::string RemoteEmbedder::get_model_key(const nlohmann::json& model_config) {
+    const std::string model_namespace = TextEmbedderManager::get_model_namespace(model_config["model_name"].get<std::string>());
+
+    if(model_namespace == "openai") {
+        return OpenAIEmbedder::get_model_key(model_config);
+    } else if(model_namespace == "google") {
+        return GoogleEmbedder::get_model_key(model_config);
+    } else if(model_namespace == "gcp") {
+        return GCPEmbedder::get_model_key(model_config);
+    } else {
+        return "";
+    }
+}
+
 OpenAIEmbedder::OpenAIEmbedder(const std::string& openai_model_path, const std::string& api_key) : api_key(api_key), openai_model_path(openai_model_path) {

 }
@ -206,6 +221,7 @@ std::vector<embedding_res_t> OpenAIEmbedder::batch_embed(const std::vector<std::
    }

    nlohmann::json res_json;
+
    try {
        res_json = nlohmann::json::parse(res);
    } catch (const std::exception& e) {
@ -217,8 +233,21 @@ std::vector<embedding_res_t> OpenAIEmbedder::batch_embed(const std::vector<std::
        }
        return outputs;
    }
+
+    if(res_json.count("data") == 0 || !res_json["data"].is_array() || res_json["data"].size() != inputs.size()) {
+        std::vector<embedding_res_t> outputs;
+        for(size_t i = 0; i < inputs.size(); i++) {
+            outputs.push_back(embedding_res_t(500, "Got malformed response from OpenAI API."));
+        }
+        return outputs;
+    }
+
    std::vector<embedding_res_t> outputs;
    for(auto& data : res_json["data"]) {
+        if(data.count("embedding") == 0 || !data["embedding"].is_array() || data["embedding"].size() == 0) {
+            outputs.push_back(embedding_res_t(500, "Got malformed response from OpenAI API."));
+            continue;
+        }
        outputs.push_back(embedding_res_t(data["embedding"].get<std::vector<float>>()));
    }

@ -255,6 +284,9 @@ nlohmann::json OpenAIEmbedder::get_error_json(const nlohmann::json& req_body, lo
    return embedding_res;
 }

+std::string OpenAIEmbedder::get_model_key(const nlohmann::json& model_config) {
+    return model_config["model_name"].get<std::string>() + ":" + model_config["api_key"].get<std::string>();
+}

 GoogleEmbedder::GoogleEmbedder(const std::string& google_api_key) : google_api_key(google_api_key) {

@ -372,6 +404,10 @@ nlohmann::json GoogleEmbedder::get_error_json(const nlohmann::json& req_body, lo
    return embedding_res;
 }

+std::string GoogleEmbedder::get_model_key(const nlohmann::json& model_config) {
+    return model_config["model_name"].get<std::string>() + ":" + model_config["api_key"].get<std::string>();
+}
+

 GCPEmbedder::GCPEmbedder(const std::string& project_id, const std::string& model_name, const std::string& access_token, 
                         const std::string& refresh_token, const std::string& client_id, const std::string& client_secret) : 
@ -555,7 +591,20 @@ std::vector<embedding_res_t> GCPEmbedder::batch_embed(const std::vector<std::str
        return outputs;
    }
    std::vector<embedding_res_t> outputs;
+
+    if(res_json.count("predictions") == 0 || !res_json["predictions"].is_array() || res_json["predictions"].size() != inputs.size()) {
+        std::vector<embedding_res_t> outputs;
+        for(size_t i = 0; i < inputs.size(); i++) {
+            outputs.push_back(embedding_res_t(500, "Got malformed response from GCP API."));
+        }
+        return outputs;
+    }
+
    for(const auto& prediction : res_json["predictions"]) {
+        if(prediction.count("embeddings") == 0 || !prediction["embeddings"].is_object() || prediction["embeddings"].count("values") == 0 || !prediction["embeddings"]["values"].is_array() || prediction["embeddings"]["values"].size() == 0) {
+            outputs.push_back(embedding_res_t(500, "Got malformed response from GCP API."));
+            continue;
+        }
        outputs.push_back(embedding_res_t(prediction["embeddings"]["values"].get<std::vector<float>>()));
    }

@ -625,3 +674,7 @@ Option<std::string> GCPEmbedder::generate_access_token(const std::string& refres

    return Option<std::string>(access_token);
 }
+
+std::string GCPEmbedder::get_model_key(const nlohmann::json& model_config) {
+    return model_config["model_name"].get<std::string>() + ":" + model_config["project_id"].get<std::string>() + ":" + model_config["client_secret"].get<std::string>();
+}
--- a/src/tokenizer.cpp
+++ b/src/tokenizer.cpp
@ -1,5 +1,6 @@
 #include <sstream>
 #include <algorithm>
+#include <string_utils.h>
 #include "tokenizer.h"

 Tokenizer::Tokenizer(const std::string& input, bool normalize, bool no_op, const std::string& locale,
@ -349,10 +350,15 @@ bool Tokenizer::should_skip_char(char c) {
    return is_ascii_char(c) && get_stream_mode(c) != INDEX;
 }

-void Tokenizer::normalize_ascii(std::string& text) {
-    for(size_t i = 0; i < text.size(); i++) {
+std::string Tokenizer::normalize_ascii_no_spaces(const std::string& text) {
+    std::string analytics_query = text;
+    StringUtils::trim(analytics_query);
+
+    for(size_t i = 0; i < analytics_query.size(); i++) {
        if(is_ascii_char(text[i])) {
-            text[i] = std::tolower(text[i]);
+            analytics_query[i] = std::tolower(analytics_query[i]);
        }
    }
+
+    return analytics_query;
 }
--- a/src/typesense_server_utils.cpp
+++ b/src/typesense_server_utils.cpp
@ -118,7 +118,7 @@ int init_root_logger(Config & config, const std::string & server_version) {

    if(log_dir.empty()) {
        // use console logger if log dir is not specified
-        FLAGS_logtostdout = true;
+        FLAGS_logtostderr = true;
    } else {
        if(!directory_exists(log_dir)) {
            std::cerr << "Typesense failed to start. " << "Log directory " << log_dir << " does not exist.";
--- a/test/collection_manager_test.cpp
+++ b/test/collection_manager_test.cpp
@ -3,6 +3,7 @@
 #include <vector>
 #include <fstream>
 #include <collection_manager.h>
+#include <analytics_manager.h>
 #include "string_utils.h"
 #include "collection.h"

@ -24,6 +25,8 @@ protected:
        collectionManager.init(store, 1.0, "auth_key", quit);
        collectionManager.load(8, 1000);

+        AnalyticsManager::get_instance().init(store);
+
        schema = R"({
            "name": "collection1",
            "enable_nested_fields": true,
@ -587,6 +590,67 @@ TEST_F(CollectionManagerTest, VerifyEmbeddedParametersOfScopedAPIKey) {
    collectionManager.drop_collection("coll1");
 }

+TEST_F(CollectionManagerTest, QuerySuggestionsShouldBeTrimmed) {
+    std::vector<field> fields = {field("title", field_types::STRING, false, false, true, "", -1, 1),
+                                 field("year", field_types::INT32, false),
+                                 field("points", field_types::INT32, false),};
+
+    Collection* coll1 = collectionManager.create_collection("coll1", 1, fields, "points").get();
+
+    nlohmann::json doc1;
+    doc1["id"] = "0";
+    doc1["title"] = "Tom Sawyer";
+    doc1["year"] = 1876;
+    doc1["points"] = 100;
+
+    ASSERT_TRUE(coll1->add(doc1.dump()).ok());
+
+    Config::get_instance().set_enable_search_analytics(true);
+
+    nlohmann::json analytics_rule = R"({
+        "name": "top_search_queries",
+        "type": "popular_queries",
+        "params": {
+            "limit": 100,
+            "source": {
+                "collections": ["coll1"]
+            },
+            "destination": {
+                "collection": "top_queries"
+            }
+        }
+    })"_json;
+
+    auto create_op = AnalyticsManager::get_instance().create_rule(analytics_rule, false, true);
+    ASSERT_TRUE(create_op.ok());
+
+    nlohmann::json embedded_params;
+    std::map<std::string, std::string> req_params;
+    req_params["collection"] = "coll1";
+    req_params["q"] = " tom ";
+    req_params["query_by"] = "title";
+
+    std::string json_res;
+    auto now_ts = std::chrono::duration_cast<std::chrono::microseconds>(
+        std::chrono::system_clock::now().time_since_epoch()).count();
+
+    auto search_op = collectionManager.do_search(req_params, embedded_params, json_res, now_ts);
+    ASSERT_TRUE(search_op.ok());
+
+    json_res.clear();
+    req_params["q"] = "  ";
+    search_op = collectionManager.do_search(req_params, embedded_params, json_res, now_ts);
+    ASSERT_TRUE(search_op.ok());
+
+    // check that suggestions have been trimmed
+    auto popular_queries = AnalyticsManager::get_instance().get_popular_queries();
+    ASSERT_EQ(2, popular_queries["top_queries"]->get_user_prefix_queries()[""].size());
+    ASSERT_EQ("tom", popular_queries["top_queries"]->get_user_prefix_queries()[""][0].query);
+    ASSERT_EQ("", popular_queries["top_queries"]->get_user_prefix_queries()[""][1].query);
+
+    collectionManager.drop_collection("coll1");
+}
+
 TEST_F(CollectionManagerTest, RestoreAutoSchemaDocsOnRestart) {
    Collection *coll1;

--- a/test/collection_schema_change_test.cpp
+++ b/test/collection_schema_change_test.cpp
@ -1566,6 +1566,14 @@ TEST_F(CollectionSchemaChangeTest, UpdateSchemaWithNewEmbeddingField) {
    ASSERT_TRUE(res.ok());
    ASSERT_EQ(1, coll->get_embedding_fields().size());

+    auto search_schema = coll->get_schema();
+
+    auto embedding_field_it = search_schema.find("embedding");
+    ASSERT_TRUE(embedding_field_it != coll->get_schema().end());
+    ASSERT_EQ("embedding", embedding_field_it.value().name);
+    ASSERT_EQ("float[]", embedding_field_it.value().type);
+    ASSERT_EQ(384, embedding_field_it.value().num_dim);
+
    nlohmann::json doc;
    doc["names"] = {"hello", "world"};
    auto add_op = coll->add(doc.dump());
@ -1580,9 +1588,13 @@ TEST_F(CollectionSchemaChangeTest, DropFieldUsedForEmbedding) {
    nlohmann::json schema = R"({
            "name": "objects",
            "fields": [
-            {"name": "names", "type": "string[]"},
-            {"name": "category", "type":"string"}, 
-            {"name": "embedding", "type":"float[]", "embed":{"from": ["names","category"], "model_config": {"model_name": "ts/e5-small"}}}
+                {"name": "title", "type": "string"},
+                {"name": "names", "type": "string[]"},
+                {"name": "category", "type":"string"},
+                {"name": "embedding", "type":"float[]", "embed":{"from": ["names","category"],
+                    "model_config": {"model_name": "ts/e5-small"}}},
+                {"name": "embedding2", "type":"float[]", "embed":{"from": ["names"],
+                                    "model_config": {"model_name": "ts/e5-small"}}}
            ]
        })"_json;

@ -1594,20 +1606,28 @@ TEST_F(CollectionSchemaChangeTest, DropFieldUsedForEmbedding) {

    LOG(INFO) << "Created collection";

+    auto embedding_fields = coll->get_embedding_fields();
+    ASSERT_EQ(2, embedding_fields.size());
+    ASSERT_EQ(2, embedding_fields["embedding"].embed[fields::from].get<std::vector<std::string>>().size());
+    ASSERT_EQ(1, embedding_fields["embedding2"].embed[fields::from].get<std::vector<std::string>>().size());
+
+    auto coll_schema = coll->get_schema();
+    ASSERT_EQ(5, coll_schema.size());
+
+    auto the_fields = coll->get_fields();
+    ASSERT_EQ(5, the_fields.size());
+
    auto schema_changes = R"({
        "fields": [
            {"name": "names", "drop": true}
        ]
    })"_json;

-
-    auto embedding_fields = coll->get_embedding_fields();
-    ASSERT_EQ(2, embedding_fields["embedding"].embed[fields::from].get<std::vector<std::string>>().size());
-
    auto alter_op = coll->alter(schema_changes);
    ASSERT_TRUE(alter_op.ok());

    embedding_fields = coll->get_embedding_fields();
+    ASSERT_EQ(1, embedding_fields.size());
    ASSERT_EQ(1, embedding_fields["embedding"].embed[fields::from].get<std::vector<std::string>>().size());
    ASSERT_EQ("category", embedding_fields["embedding"].embed[fields::from].get<std::vector<std::string>>()[0]);

@ -1623,6 +1643,16 @@ TEST_F(CollectionSchemaChangeTest, DropFieldUsedForEmbedding) {
    embedding_fields = coll->get_embedding_fields();
    ASSERT_EQ(0, embedding_fields.size());
    ASSERT_EQ(0, coll->_get_index()->_get_vector_index().size());
+
+    // only title remains
+
+    coll_schema = coll->get_schema();
+    ASSERT_EQ(1, coll_schema.size());
+    ASSERT_EQ("title", coll_schema["title"].name);
+
+    the_fields = coll->get_fields();
+    ASSERT_EQ(1, the_fields.size());
+    ASSERT_EQ("title", the_fields[0].name);
 }

 TEST_F(CollectionSchemaChangeTest, EmbeddingFieldsMapTest) {
--- a/test/collection_specific_more_test.cpp
+++ b/test/collection_specific_more_test.cpp
@ -1816,6 +1816,36 @@ TEST_F(CollectionSpecificMoreTest, ConsiderDroppedTokensDuringTextMatchScoring)
    ASSERT_EQ("1", res["hits"][1]["document"]["id"].get<std::string>());
 }

+TEST_F(CollectionSpecificMoreTest, ConsiderDroppedTokensDuringTextMatchScoring2) {
+    nlohmann::json schema = R"({
+            "name": "coll1",
+            "fields": [
+                {"name": "name", "type": "string"}
+            ]
+        })"_json;
+
+    Collection *coll1 = collectionManager.create_collection(schema).get();
+
+    nlohmann::json doc;
+    doc["id"] = "0";
+    doc["name"] = "Elizabeth Arden 5th Avenue Eau de Parfum 125ml";
+    ASSERT_TRUE(coll1->add(doc.dump()).ok());
+
+    doc["id"] = "1";
+    doc["name"] = "Avène Sun Very High Protection Mineral Cream SPF50+ 50ml";
+    ASSERT_TRUE(coll1->add(doc.dump()).ok());
+
+    auto res = coll1->search("avène eau mineral", {"name"}, "", {}, {}, {2}, 10, 1, FREQUENCY, {true}, 5,
+                             spp::sparse_hash_set<std::string>(),
+                             spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 20, {}, {}, {}, 0,
+                             "<mark>", "</mark>", {3}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7, fallback,
+                             4, {off}, 0, 0, 0, 2, false, "", true, 0, max_weight).get();
+
+    ASSERT_EQ(2, res["hits"].size());
+    ASSERT_EQ("1", res["hits"][0]["document"]["id"].get<std::string>());
+    ASSERT_EQ("0", res["hits"][1]["document"]["id"].get<std::string>());
+}
+
 TEST_F(CollectionSpecificMoreTest, NonNestedFieldNameWithDot) {
    nlohmann::json schema = R"({
        "name": "coll1",
--- a/test/collection_vector_search_test.cpp
+++ b/test/collection_vector_search_test.cpp
@ -224,6 +224,137 @@ TEST_F(CollectionVectorTest, BasicVectorQuerying) {
    collectionManager.drop_collection("coll1");
 }

+TEST_F(CollectionVectorTest, VectorUnchangedUpsert) {
+    nlohmann::json schema = R"({
+            "name": "coll1",
+            "fields": [
+                {"name": "title", "type": "string"},
+                {"name": "points", "type": "int32"},
+                {"name": "vec", "type": "float[]", "num_dim": 3}
+            ]
+        })"_json;
+
+    Collection* coll1 = collectionManager.create_collection(schema).get();
+
+    std::vector<float> vec = {0.12, 0.45, 0.64};
+
+    nlohmann::json doc;
+    doc["id"] = "0";
+    doc["title"] = "Title";
+    doc["points"] = 100;
+    doc["vec"] = vec;
+
+    auto add_op = coll1->add(doc.dump());
+    ASSERT_TRUE(add_op.ok());
+
+    auto results = coll1->search("*", {}, "", {}, {}, {0}, 10, 1, FREQUENCY, {true}, Index::DROP_TOKENS_THRESHOLD,
+                                 spp::sparse_hash_set<std::string>(),
+                                 spp::sparse_hash_set<std::string>(), 10, "", 30, 5,
+                                 "", 10, {}, {}, {}, 0,
+                                 "<mark>", "</mark>", {}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7, fallback,
+                                 4, {off}, 32767, 32767, 2,
+                                 false, true, "vec:([0.12, 0.44, 0.55])").get();
+
+    ASSERT_EQ(1, results["found"].get<size_t>());
+
+
+    // upsert unchanged doc
+    add_op = coll1->add(doc.dump(), index_operation_t::UPSERT);
+    ASSERT_TRUE(add_op.ok());
+
+    results = coll1->search("*", {}, "", {}, {}, {0}, 10, 1, FREQUENCY, {true}, Index::DROP_TOKENS_THRESHOLD,
+                            spp::sparse_hash_set<std::string>(),
+                            spp::sparse_hash_set<std::string>(), 10, "", 30, 5,
+                            "", 10, {}, {}, {}, 0,
+                            "<mark>", "</mark>", {}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7, fallback,
+                            4, {off}, 32767, 32767, 2,
+                            false, true, "vec:([0.12, 0.44, 0.55])").get();
+
+    ASSERT_EQ(1, results["found"].get<size_t>());
+
+    // emplace unchanged doc
+    add_op = coll1->add(doc.dump(), index_operation_t::EMPLACE);
+    ASSERT_TRUE(add_op.ok());
+
+    results = coll1->search("*", {}, "", {}, {}, {0}, 10, 1, FREQUENCY, {true}, Index::DROP_TOKENS_THRESHOLD,
+                            spp::sparse_hash_set<std::string>(),
+                            spp::sparse_hash_set<std::string>(), 10, "", 30, 5,
+                            "", 10, {}, {}, {}, 0,
+                            "<mark>", "</mark>", {}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7, fallback,
+                            4, {off}, 32767, 32767, 2,
+                            false, true, "vec:([0.12, 0.44, 0.55])").get();
+
+    ASSERT_EQ(1, results["found"].get<size_t>());
+}
+
+TEST_F(CollectionVectorTest, VectorPartialUpdate) {
+    nlohmann::json schema = R"({
+            "name": "coll1",
+            "fields": [
+                {"name": "title", "type": "string"},
+                {"name": "points", "type": "int32"},
+                {"name": "vec", "type": "float[]", "num_dim": 3}
+            ]
+        })"_json;
+
+    Collection* coll1 = collectionManager.create_collection(schema).get();
+
+    std::vector<float> vec = {0.12, 0.45, 0.64};
+
+    nlohmann::json doc;
+    doc["id"] = "0";
+    doc["title"] = "Title";
+    doc["points"] = 100;
+    doc["vec"] = vec;
+
+    auto add_op = coll1->add(doc.dump());
+    ASSERT_TRUE(add_op.ok());
+
+    auto results = coll1->search("*", {}, "", {}, {}, {0}, 10, 1, FREQUENCY, {true}, Index::DROP_TOKENS_THRESHOLD,
+                                 spp::sparse_hash_set<std::string>(),
+                                 spp::sparse_hash_set<std::string>(), 10, "", 30, 5,
+                                 "", 10, {}, {}, {}, 0,
+                                 "<mark>", "</mark>", {}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7, fallback,
+                                 4, {off}, 32767, 32767, 2,
+                                 false, true, "vec:([0.12, 0.44, 0.55])").get();
+
+    ASSERT_EQ(1, results["found"].get<size_t>());
+
+
+    // emplace partial doc
+    doc.erase("vec");
+    doc["title"] = "Random";
+    add_op = coll1->add(doc.dump(), index_operation_t::EMPLACE);
+    ASSERT_TRUE(add_op.ok());
+
+    results = coll1->search("Random", {"title"}, "", {}, {}, {0}, 10, 1, FREQUENCY, {true}, Index::DROP_TOKENS_THRESHOLD,
+                            spp::sparse_hash_set<std::string>(),
+                            spp::sparse_hash_set<std::string>(), 10, "", 30, 5,
+                            "", 10, {}, {}, {}, 0,
+                            "<mark>", "</mark>", {}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7, fallback,
+                            4, {off}, 32767, 32767, 2,
+                            false, true, "vec:([0.12, 0.44, 0.55])").get();
+
+    ASSERT_EQ(1, results["found"].get<size_t>());
+
+    // update portial doc
+
+    doc.erase("vec");
+    doc["title"] = "Random";
+    add_op = coll1->add(doc.dump(), index_operation_t::UPDATE);
+    ASSERT_TRUE(add_op.ok());
+
+    results = coll1->search("Random", {"title"}, "", {}, {}, {0}, 10, 1, FREQUENCY, {true}, Index::DROP_TOKENS_THRESHOLD,
+                            spp::sparse_hash_set<std::string>(),
+                            spp::sparse_hash_set<std::string>(), 10, "", 30, 5,
+                            "", 10, {}, {}, {}, 0,
+                            "<mark>", "</mark>", {}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7, fallback,
+                            4, {off}, 32767, 32767, 2,
+                            false, true, "vec:([0.12, 0.44, 0.55])").get();
+
+    ASSERT_EQ(1, results["found"].get<size_t>());
+}
+
 TEST_F(CollectionVectorTest, NumVectorGreaterThanNumDim) {
    nlohmann::json schema = R"({
            "name": "coll1",
@ -692,6 +823,88 @@ TEST_F(CollectionVectorTest, VectorWithNullValue) {
              nlohmann::json::parse(json_lines[1])["error"].get<std::string>());
 }

+TEST_F(CollectionVectorTest, EmbeddedVectorUnchangedUpsert) {
+    nlohmann::json schema = R"({
+                "name": "coll1",
+                "fields": [
+                    {"name": "title", "type": "string"},
+                    {"name": "points", "type": "int32"},
+                    {"name": "embedding", "type":"float[]", "embed":{"from": ["title"],
+                        "model_config": {"model_name": "ts/e5-small"}}}
+                ]
+            })"_json;
+
+    TextEmbedderManager::set_model_dir("/tmp/typesense_test/models");
+
+    Collection* coll1 = collectionManager.create_collection(schema).get();
+
+    nlohmann::json doc;
+    doc["id"] = "0";
+    doc["title"] = "Title";
+    doc["points"] = 100;
+
+    auto add_op = coll1->add(doc.dump());
+    ASSERT_TRUE(add_op.ok());
+
+    auto results = coll1->search("title", {"embedding"}, "", {}, {}, {0}, 10, 1, FREQUENCY, {true}, Index::DROP_TOKENS_THRESHOLD,
+                                 spp::sparse_hash_set<std::string>(),
+                                 spp::sparse_hash_set<std::string>()).get();
+
+    ASSERT_EQ(1, results["found"].get<size_t>());
+    auto embedding = results["hits"][0]["document"]["embedding"].get<std::vector<float>>();
+    ASSERT_EQ(384, embedding.size());
+
+    // upsert unchanged doc
+    doc.clear();
+    doc["id"] = "0";
+    doc["title"] = "Title";
+    doc["points"] = 100;
+
+    add_op = coll1->add(doc.dump(), index_operation_t::UPSERT);
+    ASSERT_TRUE(add_op.ok());
+
+    results = coll1->search("title", {"embedding"}, "", {}, {}, {0}, 10, 1, FREQUENCY, {true}, Index::DROP_TOKENS_THRESHOLD,
+                            spp::sparse_hash_set<std::string>(),
+                            spp::sparse_hash_set<std::string>()).get();
+    ASSERT_EQ(1, results["found"].get<size_t>());
+    embedding = results["hits"][0]["document"]["embedding"].get<std::vector<float>>();
+    ASSERT_EQ(384, embedding.size());
+
+    // update
+
+    doc.clear();
+    doc["id"] = "0";
+    doc["title"] = "Title";
+    doc["points"] = 100;
+
+    add_op = coll1->add(doc.dump(), index_operation_t::UPDATE);
+    ASSERT_TRUE(add_op.ok());
+
+    results = coll1->search("title", {"embedding"}, "", {}, {}, {0}, 10, 1, FREQUENCY, {true}, Index::DROP_TOKENS_THRESHOLD,
+                            spp::sparse_hash_set<std::string>(),
+                            spp::sparse_hash_set<std::string>()).get();
+    ASSERT_EQ(1, results["found"].get<size_t>());
+    embedding = results["hits"][0]["document"]["embedding"].get<std::vector<float>>();
+    ASSERT_EQ(384, embedding.size());
+
+    // emplace
+
+    doc.clear();
+    doc["id"] = "0";
+    doc["title"] = "Title";
+    doc["points"] = 100;
+
+    add_op = coll1->add(doc.dump(), index_operation_t::EMPLACE);
+    ASSERT_TRUE(add_op.ok());
+
+    results = coll1->search("title", {"embedding"}, "", {}, {}, {0}, 10, 1, FREQUENCY, {true}, Index::DROP_TOKENS_THRESHOLD,
+                            spp::sparse_hash_set<std::string>(),
+                            spp::sparse_hash_set<std::string>()).get();
+    ASSERT_EQ(1, results["found"].get<size_t>());
+    embedding = results["hits"][0]["document"]["embedding"].get<std::vector<float>>();
+    ASSERT_EQ(384, embedding.size());
+}
+
 TEST_F(CollectionVectorTest, HybridSearchWithExplicitVector) {
    nlohmann::json schema = R"({
                            "name": "objects",
@ -1099,7 +1312,67 @@ TEST_F(CollectionVectorTest, HideCredential) {
    ASSERT_EQ("***********", coll_summary["fields"][1]["embed"]["model_config"]["project_id"].get<std::string>());
 }

-TEST_F(CollectionVectorTest, UpdateOfCollWithNonOptionalEmbeddingField) {
+TEST_F(CollectionVectorTest, UpdateOfFieldReferencedByEmbedding) {
+    nlohmann::json schema = R"({
+        "name": "objects",
+        "fields": [
+            {"name": "name", "type": "string"},
+            {"name": "embedding", "type":"float[]", "embed":{"from": ["name"],
+                "model_config": {"model_name": "ts/e5-small"}}}
+        ]
+    })"_json;
+
+    TextEmbedderManager::set_model_dir("/tmp/typesense_test/models");
+
+    auto op = collectionManager.create_collection(schema);
+    ASSERT_TRUE(op.ok());
+    Collection* coll = op.get();
+
+    nlohmann::json object;
+    object["id"] = "0";
+    object["name"] = "butter";
+
+    auto add_op = coll->add(object.dump(), CREATE);
+    ASSERT_TRUE(add_op.ok());
+
+    auto results = coll->search("butter", {"embedding"}, "", {}, {}, {0}, 10, 1, FREQUENCY, {true}).get();
+    ASSERT_EQ(1, results["found"].get<size_t>());
+    auto original_embedding = results["hits"][0]["document"]["embedding"].get<std::vector<float>>();
+
+    nlohmann::json update_object;
+    update_object["id"] = "0";
+    update_object["name"] = "ghee";
+    auto update_op = coll->add(update_object.dump(), EMPLACE);
+    ASSERT_TRUE(update_op.ok());
+
+    results = coll->search("ghee", {"embedding"}, "", {}, {}, {0}, 10, 1, FREQUENCY, {true}).get();
+    ASSERT_EQ(1, results["found"].get<size_t>());
+    auto updated_embedding = results["hits"][0]["document"]["embedding"].get<std::vector<float>>();
+    ASSERT_NE(original_embedding, updated_embedding);
+
+    // action = update
+    update_object["name"] = "milk";
+    update_op = coll->add(update_object.dump(), UPDATE);
+    ASSERT_TRUE(update_op.ok());
+
+    results = coll->search("milk", {"embedding"}, "", {}, {}, {0}, 10, 1, FREQUENCY, {true}).get();
+    ASSERT_EQ(1, results["found"].get<size_t>());
+    updated_embedding = results["hits"][0]["document"]["embedding"].get<std::vector<float>>();
+    ASSERT_NE(original_embedding, updated_embedding);
+
+    // action = upsert
+    update_object["name"] = "cheese";
+    update_op = coll->add(update_object.dump(), UPSERT);
+    ASSERT_TRUE(update_op.ok());
+
+    results = coll->search("cheese", {"embedding"}, "", {}, {}, {0}, 10, 1, FREQUENCY, {true}).get();
+    ASSERT_EQ(1, results["found"].get<size_t>());
+    updated_embedding = results["hits"][0]["document"]["embedding"].get<std::vector<float>>();
+    ASSERT_NE(original_embedding, updated_embedding);
+}
+
+TEST_F(CollectionVectorTest, UpdateOfFieldNotReferencedByEmbedding) {
+    // test updates to a field that's not referred by an embedding field
    nlohmann::json schema = R"({
        "name": "objects",
        "fields": [
@ -1123,16 +1396,34 @@ TEST_F(CollectionVectorTest, UpdateOfCollWithNonOptionalEmbeddingField) {
    auto add_op = coll->add(object.dump(), CREATE);
    ASSERT_TRUE(add_op.ok());

+    auto results = coll->search("butter", {"embedding"}, "", {}, {}, {0}, 10, 1, FREQUENCY, {true}).get();
+    ASSERT_EQ(1, results["found"].get<size_t>());
+
    nlohmann::json update_object;
    update_object["id"] = "0";
    update_object["about"] = "something about butter";
    auto update_op = coll->add(update_object.dump(), EMPLACE);
    ASSERT_TRUE(update_op.ok());

+    results = coll->search("butter", {"embedding"}, "", {}, {}, {0}, 10, 1, FREQUENCY, {true}).get();
+    ASSERT_EQ(1, results["found"].get<size_t>());
+
    // action = update
    update_object["about"] = "something about butter 2";
    update_op = coll->add(update_object.dump(), UPDATE);
    ASSERT_TRUE(update_op.ok());
+
+    results = coll->search("butter", {"embedding"}, "", {}, {}, {0}, 10, 1, FREQUENCY, {true}).get();
+    ASSERT_EQ(1, results["found"].get<size_t>());
+
+    // action = upsert
+    update_object["name"] = "butter";
+    update_object["about"] = "something about butter 3";
+    update_op = coll->add(update_object.dump(), UPSERT);
+    ASSERT_TRUE(update_op.ok());
+
+    results = coll->search("butter", {"embedding"}, "", {}, {}, {0}, 10, 1, FREQUENCY, {true}).get();
+    ASSERT_EQ(1, results["found"].get<size_t>());
 }

 TEST_F(CollectionVectorTest, FreshEmplaceWithOptionalEmbeddingReferencedField) {
@ -1161,6 +1452,27 @@ TEST_F(CollectionVectorTest, FreshEmplaceWithOptionalEmbeddingReferencedField) {
              "or make the embedding field optional.", add_op.error());
 }

+TEST_F(CollectionVectorTest, EmbeddingFieldWithIdFieldPrecedingInSchema) {
+    auto schema = R"({
+        "name": "objects",
+        "fields": [
+            {"name": "id", "type": "string"},
+            {"name": "name", "type": "string"},
+            {"name": "embedding", "type":"float[]", "embed":{"from": ["name"], "model_config": {"model_name": "ts/e5-small"}}}
+        ]
+    })"_json;
+
+    TextEmbedderManager::set_model_dir("/tmp/typesense_test/models");
+
+    auto op = collectionManager.create_collection(schema);
+    ASSERT_TRUE(op.ok());
+    Collection* coll = op.get();
+
+    auto fs = coll->get_fields();
+    ASSERT_EQ(2, fs.size());
+    ASSERT_EQ(384, fs[1].num_dim);
+}
+
 TEST_F(CollectionVectorTest, SkipEmbeddingOpWhenValueExists) {
    nlohmann::json schema = R"({
        "name": "objects",
@ -1306,6 +1618,58 @@ TEST_F(CollectionVectorTest, KeywordSearchReturnOnlyTextMatchInfo) {
    ASSERT_EQ(1, results["hits"][0].count("text_match_info"));
 }

+TEST_F(CollectionVectorTest, GroupByWithVectorSearch) {
+    nlohmann::json schema = R"({
+        "name": "coll1",
+        "fields": [
+            {"name": "title", "type": "string"},
+            {"name": "group", "type": "string", "facet": true},
+            {"name": "vec", "type": "float[]", "num_dim": 4}
+        ]
+    })"_json;
+
+    Collection* coll1 = collectionManager.create_collection(schema).get();
+
+    std::vector<std::vector<float>> values = {
+        {0.851758, 0.909671, 0.823431, 0.372063},
+        {0.97826, 0.933157, 0.39557, 0.306488},
+        {0.230606, 0.634397, 0.514009, 0.399594}
+    };
+
+    for (size_t i = 0; i < values.size(); i++) {
+        nlohmann::json doc;
+        doc["id"] = std::to_string(i);
+        doc["title"] = std::to_string(i) + " title";
+        doc["group"] = "0";
+        doc["vec"] = values[i];
+        ASSERT_TRUE(coll1->add(doc.dump()).ok());
+    }
+
+    auto res = coll1->search("title", {"title"}, "", {}, {}, {0}, 10, 1, FREQUENCY, {true}, Index::DROP_TOKENS_THRESHOLD,
+                     spp::sparse_hash_set<std::string>(),
+                     spp::sparse_hash_set<std::string>(), 10, "", 30, 5,
+                     "", 10, {}, {}, {"group"}, 1,
+                     "<mark>", "</mark>", {}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7, fallback,
+                     4, {off}, 32767, 32767, 2,
+                     false, true, "vec:([0.96826, 0.94, 0.39557, 0.306488])").get();
+
+    ASSERT_EQ(1, res["grouped_hits"].size());
+    ASSERT_EQ(1, res["grouped_hits"][0]["hits"].size());
+    ASSERT_EQ(1, res["grouped_hits"][0]["hits"][0].count("vector_distance"));
+
+    res = coll1->search("*", {"title"}, "", {}, {}, {0}, 10, 1, FREQUENCY, {true}, Index::DROP_TOKENS_THRESHOLD,
+                        spp::sparse_hash_set<std::string>(),
+                        spp::sparse_hash_set<std::string>(), 10, "", 30, 5,
+                        "", 10, {}, {}, {"group"}, 1,
+                        "<mark>", "</mark>", {}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7, fallback,
+                        4, {off}, 32767, 32767, 2,
+                        false, true, "vec:([0.96826, 0.94, 0.39557, 0.306488])").get();
+
+    ASSERT_EQ(1, res["grouped_hits"].size());
+    ASSERT_EQ(1, res["grouped_hits"][0]["hits"].size());
+    ASSERT_EQ(1, res["grouped_hits"][0]["hits"][0].count("vector_distance"));
+}
+
 TEST_F(CollectionVectorTest, HybridSearchReturnAllInfo) {
    auto schema_json =
            R"({
@ -1342,3 +1706,135 @@ TEST_F(CollectionVectorTest, HybridSearchReturnAllInfo) {
    ASSERT_EQ(1, results["hits"][0].count("text_match_info"));
    ASSERT_EQ(1, results["hits"][0].count("hybrid_search_info"));
 }
+
+
+TEST_F(CollectionVectorTest, DISABLED_HybridSortingTest) {
+    auto schema_json =
+            R"({
+            "name": "TEST",
+            "fields": [
+                {"name": "name", "type": "string"},
+                {"name": "embedding", "type":"float[]", "embed":{"from": ["name"], "model_config": {"model_name": "ts/e5-small"}}}
+            ]
+    })"_json;
+    
+    TextEmbedderManager::set_model_dir("/tmp/typesense_test/models");
+    auto collection_create_op = collectionManager.create_collection(schema_json);
+    ASSERT_TRUE(collection_create_op.ok());
+    auto coll1 = collection_create_op.get();
+
+    auto add_op = coll1->add(R"({
+        "name": "john doe"
+    })"_json.dump());
+    ASSERT_TRUE(add_op.ok());
+
+    add_op = coll1->add(R"({
+        "name": "john legend"
+    })"_json.dump());
+    ASSERT_TRUE(add_op.ok());
+
+    add_op = coll1->add(R"({
+        "name": "john krasinski"
+    })"_json.dump());
+    ASSERT_TRUE(add_op.ok());
+
+    add_op = coll1->add(R"({
+        "name": "john abraham"
+    })"_json.dump());
+    ASSERT_TRUE(add_op.ok());
+
+    // first do keyword search
+    auto results = coll1->search("john", {"name"},
+                                 "", {}, {}, {2}, 10,
+                                 1, FREQUENCY, {true},
+                                 0, spp::sparse_hash_set<std::string>()).get();
+    
+    ASSERT_EQ(4, results["hits"].size());
+
+
+    // now do hybrid search with sort_by: _text_match:desc,_vector_distance:asc
+    std::vector<sort_by> sort_by_list = {{"_text_match", "desc"}, {"_vector_distance", "asc"}};
+
+    auto hybrid_results = coll1->search("john", {"name", "embedding"},
+                                 "", {}, sort_by_list, {2}, 10,
+                                 1, FREQUENCY, {true},
+                                 0, spp::sparse_hash_set<std::string>()).get();
+    
+    // first 4 results should be same as keyword search
+    ASSERT_EQ(results["hits"][0]["document"]["name"].get<std::string>(), hybrid_results["hits"][0]["document"]["name"].get<std::string>());
+    ASSERT_EQ(results["hits"][1]["document"]["name"].get<std::string>(), hybrid_results["hits"][1]["document"]["name"].get<std::string>());
+    ASSERT_EQ(results["hits"][2]["document"]["name"].get<std::string>(), hybrid_results["hits"][2]["document"]["name"].get<std::string>());
+    ASSERT_EQ(results["hits"][3]["document"]["name"].get<std::string>(), hybrid_results["hits"][3]["document"]["name"].get<std::string>());
+}
+
+TEST_F(CollectionVectorTest, TestDifferentOpenAIApiKeys) {
+    if (std::getenv("api_key_1") == nullptr || std::getenv("api_key_2") == nullptr) {
+        LOG(INFO) << "Skipping test as api_key_1 or api_key_2 is not set";
+        return;
+    }
+
+    auto api_key1 = std::string(std::getenv("api_key_1"));
+    auto api_key2 = std::string(std::getenv("api_key_2"));
+
+    auto embedder_map = TextEmbedderManager::get_instance()._get_text_embedders();
+
+    ASSERT_EQ(embedder_map.find("openai/text-embedding-ada-002:" + api_key1), embedder_map.end());
+    ASSERT_EQ(embedder_map.find("openai/text-embedding-ada-002:" + api_key2), embedder_map.end());
+    ASSERT_EQ(embedder_map.find("openai/text-embedding-ada-002"), embedder_map.end());
+
+    nlohmann::json model_config1 = R"({
+                "model_name": "openai/text-embedding-ada-002"
+            })"_json;
+    
+    nlohmann::json model_config2 = model_config1;
+
+    model_config1["api_key"] = api_key1;
+    model_config2["api_key"] = api_key2;
+
+    size_t num_dim;
+    TextEmbedderManager::get_instance().validate_and_init_remote_model(model_config1, num_dim);
+    TextEmbedderManager::get_instance().validate_and_init_remote_model(model_config2, num_dim);
+
+    embedder_map = TextEmbedderManager::get_instance()._get_text_embedders();
+
+    ASSERT_NE(embedder_map.find("openai/text-embedding-ada-002:" + api_key1), embedder_map.end());
+    ASSERT_NE(embedder_map.find("openai/text-embedding-ada-002:" + api_key2), embedder_map.end());
+    ASSERT_EQ(embedder_map.find("openai/text-embedding-ada-002"), embedder_map.end());
+}
+
+
+TEST_F(CollectionVectorTest, TestMultilingualE5) {
+    auto schema_json =
+            R"({
+            "name": "TEST",
+            "fields": [
+                {"name": "name", "type": "string"},
+                {"name": "embedding", "type":"float[]", "embed":{"from": ["name"], "model_config": {"model_name": "ts/multilingual-e5-small"}}}
+            ]
+    })"_json;
+
+    TextEmbedderManager::set_model_dir("/tmp/typesense_test/models");
+
+    auto collection_create_op = collectionManager.create_collection(schema_json);
+
+    ASSERT_TRUE(collection_create_op.ok());
+    auto coll1 = collection_create_op.get();
+
+    auto add_op = coll1->add(R"({
+        "name": "john doe"
+    })"_json.dump());
+
+    auto hybrid_results = coll1->search("john", {"name", "embedding"},
+                                 "", {}, {}, {2}, 10,
+                                 1, FREQUENCY, {true},
+                                 0, spp::sparse_hash_set<std::string>());
+    
+    ASSERT_TRUE(hybrid_results.ok());
+
+    auto semantic_results = coll1->search("john", {"embedding"},
+                                 "", {}, {}, {2}, 10,
+                                 1, FREQUENCY, {true},
+                                 0, spp::sparse_hash_set<std::string>());
+    
+    ASSERT_TRUE(semantic_results.ok());
+}
--- a/test/core_api_utils_test.cpp
+++ b/test/core_api_utils_test.cpp
@ -610,7 +610,7 @@ TEST_F(CoreAPIUtilsTest, MultiSearchWithPresetShouldUsePresetForAuth) {
    ASSERT_EQ(2, embedded_params_vec.size());
 }

-TEST_F(CoreAPIUtilsTest, PresetSingleSearch) {
+TEST_F(CoreAPIUtilsTest, PresetMultiSearch) {
    nlohmann::json schema = R"({
        "name": "coll1",
        "fields": [
@ -634,7 +634,7 @@ TEST_F(CoreAPIUtilsTest, PresetSingleSearch) {

    auto search_body = R"(
        {"searches":[
-            {"collection":"coll1","q":"apple", "query_by": "title", "preset": "single_preset"}
+            {"collection":"coll1","q":"apple", "query_by": "name", "preset": "single_preset"}
        ]}
    )";

@ -644,8 +644,40 @@ TEST_F(CoreAPIUtilsTest, PresetSingleSearch) {

    post_multi_search(req, res);

-    ASSERT_EQ("12", req->params["per_page"]);
-    ASSERT_EQ("coll1", req->params["collection"]);
+    auto res_json = nlohmann::json::parse(res->body);
+    ASSERT_EQ(1, res_json["results"].size());
+    ASSERT_EQ(0, res_json["results"][0]["found"].get<size_t>());
+
+    // with multiple "searches" preset configuration
+    preset_value = R"(
+        {"searches":[
+            {"collection":"coll1", "q": "*", "per_page": "8"},
+            {"collection":"coll1", "q": "*", "per_page": "11"}
+        ]}
+    )"_json;
+
+    collectionManager.upsert_preset("multi_preset", preset_value);
+    embedded_params.clear();
+    req->params.clear();
+    req->params["preset"] = "multi_preset";
+    req->embedded_params_vec.clear();
+    req->embedded_params_vec.push_back(embedded_params);
+    req->embedded_params_vec.push_back(embedded_params);
+
+    //  "preset": "multi_preset"
+    search_body = R"(
+        {"searches":[
+            {"collection":"coll1","q":"apple", "query_by": "title"}
+        ]}
+    )";
+
+    req->body = search_body;
+
+    post_multi_search(req, res);
+    res_json = nlohmann::json::parse(res->body);
+    ASSERT_EQ(2, res_json["results"].size());
+    ASSERT_EQ(0, res_json["results"][0]["found"].get<size_t>());
+    ASSERT_EQ(0, res_json["results"][1]["found"].get<size_t>());

    collectionManager.drop_collection("coll1");
 }