Merge branch 'v0.25-join' of https://github.com/ozanarmagan/typesense into v0.25-join

2025-05-20 13:42:26 +08:00 · 2023-07-08 13:28:49 +03:00 · 2023-07-08 13:28:49 +03:00 · 222c50e2c1
commit 222c50e2c1
parent ab0be22489 a9de01f161
24 changed files with 563 additions and 301 deletions
--- a/include/analytics_manager.h
+++ b/include/analytics_manager.h
@ -24,10 +24,11 @@ private:

        void to_json(nlohmann::json& obj) const {
            obj["name"] = name;
+            obj["type"] = POPULAR_QUERIES_TYPE;
            obj["params"] = nlohmann::json::object();
-            obj["params"]["suggestion_collection"] = suggestion_collection;
-            obj["params"]["query_collections"] = query_collections;
            obj["params"]["limit"] = limit;
+            obj["params"]["source"]["collections"] = query_collections;
+            obj["params"]["destination"]["collection"] = suggestion_collection;
        }
    };

@ -48,7 +49,9 @@ private:

    Option<bool> remove_popular_queries_index(const std::string& name);

-    Option<bool> create_popular_queries_index(nlohmann::json &payload, bool write_to_disk);
+    Option<bool> create_popular_queries_index(nlohmann::json &payload,
+                                              bool upsert,
+                                              bool write_to_disk);

 public:

@ -69,12 +72,14 @@ public:

    Option<nlohmann::json> list_rules();

-    Option<bool> create_rule(nlohmann::json& payload, bool write_to_disk = true);
+    Option<nlohmann::json> get_rule(const std::string& name);
+
+    Option<bool> create_rule(nlohmann::json& payload, bool upsert, bool write_to_disk);

    Option<bool> remove_rule(const std::string& name);

    void add_suggestion(const std::string& query_collection,
-                        std::string& query, const bool live_query, const std::string& user_id);
+                        std::string& query, bool live_query, const std::string& user_id);

    void stop();

--- a/include/collection.h
+++ b/include/collection.h
@ -463,8 +463,10 @@ public:
                                  const text_match_type_t match_type = max_score,
                                  const size_t facet_sample_percent = 100,
                                  const size_t facet_sample_threshold = 0,
-                                  const size_t page_offset = UINT32_MAX,
-                                  const size_t vector_query_hits = 250) const;
+                                  const size_t page_offset = 0,
+                                  const size_t vector_query_hits = 250,
+                                  const size_t remote_embedding_timeout_ms = 30000,
+                                  const size_t remote_embedding_num_try = 2) const;

    Option<bool> get_filter_ids(const std::string & filter_query, filter_result_t& filter_result) const;

--- a/include/core_api.h
+++ b/include/core_api.h
@ -147,8 +147,12 @@ bool post_create_event(const std::shared_ptr<http_req>& req, const std::shared_p

 bool get_analytics_rules(const std::shared_ptr<http_req>& req, const std::shared_ptr<http_res>& res);

+bool get_analytics_rule(const std::shared_ptr<http_req>& req, const std::shared_ptr<http_res>& res);
+
 bool post_create_analytics_rules(const std::shared_ptr<http_req>& req, const std::shared_ptr<http_res>& res);

+bool put_upsert_analytics_rules(const std::shared_ptr<http_req>& req, const std::shared_ptr<http_res>& res);
+
 bool del_analytics_rules(const std::shared_ptr<http_req>& req, const std::shared_ptr<http_res>& res);

 // Misc helpers
--- a/include/http_data.h
+++ b/include/http_data.h
@ -205,6 +205,49 @@ public:
    virtual ~req_state_t() = default;
 };

+struct stream_response_state_t {
+private:
+
+    h2o_req_t* req = nullptr;
+
+public:
+
+    bool is_req_early_exit = false;
+
+    bool is_res_start = true;
+    h2o_send_state_t send_state = H2O_SEND_STATE_IN_PROGRESS;
+
+    std::string res_body;
+    h2o_iovec_t res_buff;
+
+    std::string res_content_type;
+    int status = 0;
+    const char* reason = nullptr;
+
+    h2o_generator_t* generator = nullptr;
+
+    void set_response(uint32_t status_code, const std::string& content_type, std::string& body) {
+        std::string().swap(res_body);
+        res_body = std::move(body);
+        res_buff = h2o_iovec_t{.base = res_body.data(), .len = res_body.size()};
+
+        if(is_res_start) {
+            res_content_type = std::move(content_type);
+            status = (int)status_code;
+            reason = http_res::get_status_reason(status_code);
+            is_res_start = false;
+        }
+    }
+
+    void set_req(h2o_req_t* _req) {
+        req = _req;
+    }
+
+    h2o_req_t* get_req() {
+        return req;
+    }
+};
+
 struct http_req {
    static constexpr const char* AUTH_HEADER = "x-typesense-api-key";
    static constexpr const char* USER_HEADER = "x-typesense-user-id";
@ -248,6 +291,9 @@ struct http_req {
    std::atomic<bool> is_diposed;
    std::string client_ip = "0.0.0.0";

+    // stores http lib related datastructures to avoid race conditions between indexing and http write threads
+    stream_response_state_t res_state;
+
    http_req(): _req(nullptr), route_hash(1),
                first_chunk_aggregate(true), last_chunk_aggregate(false),
                chunk_len(0), body_index(0), data(nullptr), ready(false), log_index(0),
--- a/include/http_proxy.h
+++ b/include/http_proxy.h
@ -32,11 +32,13 @@ class HttpProxy {
        void operator=(const HttpProxy&) = delete;
        HttpProxy(HttpProxy&&) = delete;
        void operator=(HttpProxy&&) = delete;
-        http_proxy_res_t send(const std::string& url, const std::string& method, const std::string& body, const std::unordered_map<std::string, std::string>& headers);
+        http_proxy_res_t send(const std::string& url, const std::string& method, const std::string& body, std::unordered_map<std::string, std::string>& headers);
    private:
        HttpProxy();
        ~HttpProxy() = default;
-        http_proxy_res_t call(const std::string& url, const std::string& method, const std::string& body = "", const std::unordered_map<std::string, std::string>& headers = {});
+        http_proxy_res_t call(const std::string& url, const std::string& method, 
+                              const std::string& body = "", const std::unordered_map<std::string, std::string>& headers = {}, 
+                              const size_t timeout_ms = 30000);


        // lru cache for http requests
--- a/include/http_server.h
+++ b/include/http_server.h
@ -40,50 +40,6 @@ struct h2o_custom_generator_t {
    }
 };

-struct stream_response_state_t {
-private:
-
-    h2o_req_t* req = nullptr;
-
-public:
-
-    bool is_req_early_exit = false;
-
-    bool is_res_start = true;
-    h2o_send_state_t send_state = H2O_SEND_STATE_IN_PROGRESS;
-
-    std::string res_body;
-    h2o_iovec_t res_buff;
-
-    h2o_iovec_t res_content_type{};
-    int status = 0;
-    const char* reason = nullptr;
-
-    h2o_generator_t* generator = nullptr;
-
-    explicit stream_response_state_t(h2o_req_t* _req): req(_req) {
-        if(req != nullptr) {
-            is_res_start = (req->res.status == 0);
-        }
-    }
-
-    void set_response(uint32_t status_code, const std::string& content_type, std::string& body) {
-        std::string().swap(res_body);
-        res_body = std::move(body);
-        res_buff = h2o_iovec_t{.base = res_body.data(), .len = res_body.size()};
-
-        if(is_res_start) {
-            res_content_type = h2o_strdup(&req->pool, content_type.c_str(), SIZE_MAX);
-            status = status_code;
-            reason = http_res::get_status_reason(status_code);
-        }
-    }
-
-    h2o_req_t* get_req() {
-        return req;
-    }
-};
-
 struct deferred_req_res_t {
    const std::shared_ptr<http_req> req;
    const std::shared_ptr<http_res> res;
@ -110,13 +66,9 @@ public:
    // used to manage lifecycle of async actions
    const bool destroy_after_use;

-    // stores http lib related datastructures to avoid race conditions between indexing and http write threads
-    stream_response_state_t res_state;
-
    async_req_res_t(const std::shared_ptr<http_req>& h_req, const std::shared_ptr<http_res>& h_res,
                    const bool destroy_after_use) :
-            req(h_req), res(h_res), destroy_after_use(destroy_after_use),
-            res_state((std::shared_lock(res->mres), h_req->is_diposed ? nullptr : h_req->_req)) {
+            req(h_req), res(h_res), destroy_after_use(destroy_after_use) {

        std::shared_lock lk(res->mres);

@ -124,12 +76,10 @@ public:
            return;
        }

-        // ***IMPORTANT***
-        // We limit writing to fields of `res_state.req` to prevent race conditions with http thread
-        // Check `HttpServer::stream_response()` for overlapping writes.
-
        h2o_custom_generator_t* res_generator = static_cast<h2o_custom_generator_t*>(res->generator.load());
+        auto& res_state = req->res_state;

+        res_state.set_req(h_req->is_diposed ? nullptr : h_req->_req);
        res_state.is_req_early_exit = (res_generator->rpath->async_req && res->final && !req->last_chunk_aggregate);
        res_state.send_state = res->final ? H2O_SEND_STATE_FINAL : H2O_SEND_STATE_IN_PROGRESS;
        res_state.generator = (res_generator == nullptr) ? nullptr : &res_generator->h2o_generator;
@ -147,6 +97,10 @@ public:
    void res_notify() {
        return res->notify();
    }
+
+    stream_response_state_t& get_res_state() {
+        return req->res_state;
+    }
 };

 struct defer_processing_t {
--- a/include/index.h
+++ b/include/index.h
@ -150,7 +150,7 @@ struct search_args {
                filter_node_t* filter_tree_root, std::vector<facet>& facets,
                std::vector<std::pair<uint32_t, uint32_t>>& included_ids, std::vector<uint32_t> excluded_ids,
                std::vector<sort_by>& sort_fields_std, facet_query_t facet_query, const std::vector<uint32_t>& num_typos,
-                size_t max_facet_values, size_t max_hits, size_t per_page, size_t page, token_ordering token_order,
+                size_t max_facet_values, size_t max_hits, size_t per_page, size_t offset, token_ordering token_order,
                const std::vector<bool>& prefixes, size_t drop_tokens_threshold, size_t typo_tokens_threshold,
                const std::vector<std::string>& group_by_fields, size_t group_limit,
                const string& default_sorting_field, bool prioritize_exact_match,
--- a/include/text_embedder.h
+++ b/include/text_embedder.h
@ -16,8 +16,8 @@ class TextEmbedder {
        // Constructor for remote models
        TextEmbedder(const nlohmann::json& model_config);
        ~TextEmbedder();
-        embedding_res_t Embed(const std::string& text);
-        std::vector<embedding_res_t> batch_embed(const std::vector<std::string>& inputs, const size_t remote_embedding_batch_size = 0);
+        embedding_res_t Embed(const std::string& text, const size_t remote_embedder_timeout_ms = 30000, const size_t remote_embedding_num_try = 2);
+        std::vector<embedding_res_t> batch_embed(const std::vector<std::string>& inputs, const size_t remote_embedding_batch_size = 200);
        const std::string& get_vocab_file_name() const;
        bool is_remote() {
            return remote_embedder_ != nullptr;
--- a/include/text_embedder_remote.h
+++ b/include/text_embedder_remote.h
@ -28,7 +28,8 @@ class RemoteEmbedder {
        static long call_remote_api(const std::string& method, const std::string& url, const std::string& body, std::string& res_body, std::map<std::string, std::string>& headers, const std::unordered_map<std::string, std::string>& req_headers);
        static inline ReplicationState* raft_server = nullptr;
    public:
-        virtual embedding_res_t Embed(const std::string& text) = 0;
+        virtual nlohmann::json get_error_json(const nlohmann::json& req_body, long res_code, const std::string& res_body) = 0;
+        virtual embedding_res_t Embed(const std::string& text, const size_t remote_embedder_timeout_ms = 30000, const size_t remote_embedding_num_try = 2) = 0;
        virtual std::vector<embedding_res_t> batch_embed(const std::vector<std::string>& inputs, const size_t remote_embedding_batch_size = 200) = 0;
        static void init(ReplicationState* rs) {
            raft_server = rs;
@ -47,8 +48,9 @@ class OpenAIEmbedder : public RemoteEmbedder {
    public:
        OpenAIEmbedder(const std::string& openai_model_path, const std::string& api_key);
        static Option<bool> is_model_valid(const nlohmann::json& model_config, unsigned int& num_dims);
-        embedding_res_t Embed(const std::string& text) override;
+        embedding_res_t Embed(const std::string& text, const size_t remote_embedder_timeout_ms = 30000, const size_t remote_embedding_num_try = 2) override;
        std::vector<embedding_res_t> batch_embed(const std::vector<std::string>& inputs, const size_t remote_embedding_batch_size = 200) override;
+        nlohmann::json get_error_json(const nlohmann::json& req_body, long res_code, const std::string& res_body) override;
 };


@ -59,11 +61,13 @@ class GoogleEmbedder : public RemoteEmbedder {
        inline static constexpr short GOOGLE_EMBEDDING_DIM = 768;
        inline static constexpr char* GOOGLE_CREATE_EMBEDDING = "https://generativelanguage.googleapis.com/v1beta2/models/embedding-gecko-001:embedText?key=";
        std::string google_api_key;
+        
    public:
        GoogleEmbedder(const std::string& google_api_key);
        static Option<bool> is_model_valid(const nlohmann::json& model_config, unsigned int& num_dims);
-        embedding_res_t Embed(const std::string& text) override;
+        embedding_res_t Embed(const std::string& text, const size_t remote_embedder_timeout_ms = 30000, const size_t remote_embedding_num_try = 2) override;
        std::vector<embedding_res_t> batch_embed(const std::vector<std::string>& inputs, const size_t remote_embedding_batch_size = 200) override;
+        nlohmann::json get_error_json(const nlohmann::json& req_body, long res_code, const std::string& res_body) override;
 };


@ -88,8 +92,9 @@ class GCPEmbedder : public RemoteEmbedder {
        GCPEmbedder(const std::string& project_id, const std::string& model_name, const std::string& access_token, 
                    const std::string& refresh_token, const std::string& client_id, const std::string& client_secret);
        static Option<bool> is_model_valid(const nlohmann::json& model_config, unsigned int& num_dims);
-        embedding_res_t Embed(const std::string& text) override;
+        embedding_res_t Embed(const std::string& text, const size_t remote_embedder_timeout_ms = 30000, const size_t remote_embedding_num_try = 2) override;
        std::vector<embedding_res_t> batch_embed(const std::vector<std::string>& inputs, const size_t remote_embedding_batch_size = 200) override;
+        nlohmann::json get_error_json(const nlohmann::json& req_body, long res_code, const std::string& res_body) override;
 };


--- a/src/analytics_manager.cpp
+++ b/src/analytics_manager.cpp
@ -5,7 +5,7 @@
 #include "http_client.h"
 #include "collection_manager.h"

-Option<bool> AnalyticsManager::create_rule(nlohmann::json& payload, bool write_to_disk) {
+Option<bool> AnalyticsManager::create_rule(nlohmann::json& payload, bool upsert, bool write_to_disk) {
    /*
        Sample payload:

@ -37,16 +37,23 @@ Option<bool> AnalyticsManager::create_rule(nlohmann::json& payload, bool write_t
    }

    if(payload["type"] == POPULAR_QUERIES_TYPE) {
-        return create_popular_queries_index(payload, write_to_disk);
+        return create_popular_queries_index(payload, upsert, write_to_disk);
    }

    return Option<bool>(400, "Invalid type.");
 }

-Option<bool> AnalyticsManager::create_popular_queries_index(nlohmann::json &payload, bool write_to_disk) {
+Option<bool> AnalyticsManager::create_popular_queries_index(nlohmann::json &payload, bool upsert, bool write_to_disk) {
    // params and name are validated upstream
-    const auto& params = payload["params"];
    const std::string& suggestion_config_name = payload["name"].get<std::string>();
+    bool already_exists = suggestion_configs.find(suggestion_config_name) != suggestion_configs.end();
+
+    if(!upsert && already_exists) {
+        return Option<bool>(400, "There's already another configuration with the name `" +
+                                 suggestion_config_name + "`.");
+    }
+
+    const auto& params = payload["params"];

    if(!params.contains("source") || !params["source"].is_object()) {
        return Option<bool>(400, "Bad or missing source.");
@ -56,18 +63,12 @@ Option<bool> AnalyticsManager::create_popular_queries_index(nlohmann::json &payl
        return Option<bool>(400, "Bad or missing destination.");
    }

-
    size_t limit = 1000;

    if(params.contains("limit") && params["limit"].is_number_integer()) {
        limit = params["limit"].get<size_t>();
    }

-    if(suggestion_configs.find(suggestion_config_name) != suggestion_configs.end()) {
-        return Option<bool>(400, "There's already another configuration with the name `" +
-                                            suggestion_config_name + "`.");
-    }
-
    if(!params["source"].contains("collections") || !params["source"]["collections"].is_array()) {
        return Option<bool>(400, "Must contain a valid list of source collections.");
    }
@ -93,6 +94,14 @@ Option<bool> AnalyticsManager::create_popular_queries_index(nlohmann::json &payl

    std::unique_lock lock(mutex);

+    if(already_exists) {
+        // remove the previous configuration with same name (upsert)
+        Option<bool> remove_op = remove_popular_queries_index(suggestion_config_name);
+        if(!remove_op.ok()) {
+            return Option<bool>(500, "Error erasing the existing configuration.");;
+        }
+    }
+
    suggestion_configs.emplace(suggestion_config_name, suggestion_config);

    for(const auto& query_coll: suggestion_config.query_collections) {
@ -130,13 +139,25 @@ Option<nlohmann::json> AnalyticsManager::list_rules() {
    for(const auto& suggestion_config: suggestion_configs) {
        nlohmann::json rule;
        suggestion_config.second.to_json(rule);
-        rule["type"] = POPULAR_QUERIES_TYPE;
        rules["rules"].push_back(rule);
    }

    return Option<nlohmann::json>(rules);
 }

+Option<nlohmann::json> AnalyticsManager::get_rule(const string& name) {
+    nlohmann::json rule;
+    std::unique_lock lock(mutex);
+
+    auto suggestion_config_it = suggestion_configs.find(name);
+    if(suggestion_config_it == suggestion_configs.end()) {
+        return Option<nlohmann::json>(404, "Rule not found.");
+    }
+
+    suggestion_config_it->second.to_json(rule);
+    return Option<nlohmann::json>(rule);
+}
+
 Option<bool> AnalyticsManager::remove_rule(const string &name) {
    std::unique_lock lock(mutex);

--- a/src/collection.cpp
+++ b/src/collection.cpp
@ -1108,7 +1108,9 @@ Option<nlohmann::json> Collection::search(std::string  raw_query,
                                  const size_t facet_sample_percent,
                                  const size_t facet_sample_threshold,
                                  const size_t page_offset,
-                                  const size_t vector_query_hits) const {
+                                  const size_t vector_query_hits,
+                                  const size_t remote_embedding_timeout_ms, 
+                                  const size_t remote_embedding_num_try) const {

    std::shared_lock lock(mutex);

@ -1234,10 +1236,15 @@ Option<nlohmann::json> Collection::search(std::string  raw_query,
                        std::string error = "Prefix search is not supported for remote embedders. Please set `prefix=false` as an additional search parameter to disable prefix searching.";
                        return Option<nlohmann::json>(400, error);
                    }
+
+                    if(remote_embedding_num_try == 0) {
+                        std::string error = "`remote-embedding-num-try` must be greater than 0.";
+                        return Option<nlohmann::json>(400, error);
+                    }
                }

                std::string embed_query = embedder_manager.get_query_prefix(search_field.embed[fields::model_config]) + raw_query;
-                auto embedding_op = embedder->Embed(embed_query);
+                auto embedding_op = embedder->Embed(embed_query, remote_embedding_timeout_ms, remote_embedding_num_try);
                if(!embedding_op.success) {
                    if(!embedding_op.error["error"].get<std::string>().empty()) {
                        return Option<nlohmann::json>(400, embedding_op.error["error"].get<std::string>());
@ -1393,7 +1400,17 @@ Option<nlohmann::json> Collection::search(std::string  raw_query,
        return Option<nlohmann::json>(422, message);
    }

-    size_t offset = (page != 0) ? (per_page * (page - 1)) : page_offset;
+    size_t offset = 0;
+
+    if(page == 0 && page_offset != 0) {
+        // if only offset is set, use that
+        offset = page_offset;
+    } else {
+        // if both are set or none set, use page value (default is 1)
+        size_t actual_page = (page == 0) ? 1 : page;
+        offset = (per_page * (actual_page - 1));
+    }
+
    size_t fetch_size = offset + per_page;

    if(fetch_size > limit_hits) {
--- a/src/collection_manager.cpp
+++ b/src/collection_manager.cpp
@ -285,6 +285,17 @@ Option<bool> CollectionManager::load(const size_t collection_batch_size, const s
        iter->Next();
    }

+    // restore query suggestions configs
+    std::vector<std::string> analytics_config_jsons;
+    store->scan_fill(AnalyticsManager::ANALYTICS_RULE_PREFIX,
+                     std::string(AnalyticsManager::ANALYTICS_RULE_PREFIX) + "`",
+                     analytics_config_jsons);
+
+    for(const auto& analytics_config_json: analytics_config_jsons) {
+        nlohmann::json analytics_config = nlohmann::json::parse(analytics_config_json);
+        AnalyticsManager::get_instance().create_rule(analytics_config, false, false);
+    }
+
    delete iter;

    LOG(INFO) << "Loaded " << num_collections << " collection(s).";
@ -671,6 +682,9 @@ Option<bool> CollectionManager::do_search(std::map<std::string, std::string>& re
    const char *VECTOR_QUERY = "vector_query";
    const char *VECTOR_QUERY_HITS = "vector_query_hits";

+    const char* REMOTE_EMBEDDING_TIMEOUT_MS = "remote_embedding_timeout_ms";
+    const char* REMOTE_EMBEDDING_NUM_TRY = "remote_embedding_num_try";
+
    const char *GROUP_BY = "group_by";
    const char *GROUP_LIMIT = "group_limit";

@ -783,7 +797,7 @@ Option<bool> CollectionManager::do_search(std::map<std::string, std::string>& re
    std::vector<sort_by> sort_fields;
    size_t per_page = 10;
    size_t page = 0;
-    size_t offset = UINT32_MAX;
+    size_t offset = 0;
    token_ordering token_order = NOT_SET;

    std::string vector_query;
@ -824,6 +838,9 @@ Option<bool> CollectionManager::do_search(std::map<std::string, std::string>& re
    text_match_type_t match_type = max_score;
    size_t vector_query_hits = 250;

+    size_t remote_embedding_timeout_ms = 30000;
+    size_t remote_embedding_num_try = 2;
+
    size_t facet_sample_percent = 100;
    size_t facet_sample_threshold = 0;

@ -850,6 +867,8 @@ Option<bool> CollectionManager::do_search(std::map<std::string, std::string>& re
        {FACET_SAMPLE_PERCENT, &facet_sample_percent},
        {FACET_SAMPLE_THRESHOLD, &facet_sample_threshold},
        {VECTOR_QUERY_HITS, &vector_query_hits},
+        {REMOTE_EMBEDDING_TIMEOUT_MS, &remote_embedding_timeout_ms},
+        {REMOTE_EMBEDDING_NUM_TRY, &remote_embedding_num_try},
    };

    std::unordered_map<std::string, std::string*> str_values = {
@ -978,14 +997,6 @@ Option<bool> CollectionManager::do_search(std::map<std::string, std::string>& re
        per_page = 0;
    }

-    if(!req_params[PAGE].empty() && page == 0 && offset == UINT32_MAX) {
-        return Option<bool>(422, "Parameter `page` must be an integer of value greater than 0.");
-    }
-
-    if(req_params[PAGE].empty() && req_params[OFFSET].empty()) {
-        page = 1;
-    }
-
    include_fields.insert(include_fields_vec.begin(), include_fields_vec.end());
    exclude_fields.insert(exclude_fields_vec.begin(), exclude_fields_vec.end());

@ -1070,7 +1081,10 @@ Option<bool> CollectionManager::do_search(std::map<std::string, std::string>& re
                                                          match_type,
                                                          facet_sample_percent,
                                                          facet_sample_threshold,
-                                                          offset
+                                                          offset,
+                                                          vector_query_hits,
+                                                          remote_embedding_timeout_ms,
+                                                          remote_embedding_num_try
                                                        );

    uint64_t timeMillis = std::chrono::duration_cast<std::chrono::milliseconds>(
@ -1097,10 +1111,10 @@ Option<bool> CollectionManager::do_search(std::map<std::string, std::string>& re
        result["search_time_ms"] = timeMillis;
    }

-    if(page != 0) {
-        result["page"] = page;
-    } else {
+    if(page == 0 && offset != 0) {
        result["offset"] = offset;
+    } else {
+        result["page"] = (page == 0) ? 1 : page;
    }

    results_json_str = result.dump(-1, ' ', false, nlohmann::detail::error_handler_t::ignore);
@ -1320,17 +1334,6 @@ Option<bool> CollectionManager::load_collection(const nlohmann::json &collection
        collection->add_synonym(collection_synonym, false);
    }

-    // restore query suggestions configs
-    std::vector<std::string> analytics_config_jsons;
-    cm.store->scan_fill(AnalyticsManager::ANALYTICS_RULE_PREFIX,
-                        std::string(AnalyticsManager::ANALYTICS_RULE_PREFIX) + "`",
-                        analytics_config_jsons);
-
-    for(const auto& analytics_config_json: analytics_config_jsons) {
-        nlohmann::json analytics_config = nlohmann::json::parse(analytics_config_json);
-        AnalyticsManager::get_instance().create_rule(analytics_config, false);
-    }
-
    // Fetch records from the store and re-create memory index
    const std::string seq_id_prefix = collection->get_seq_id_collection_prefix();
    std::string upper_bound_key = collection->get_seq_id_collection_prefix() + "`";  // cannot inline this
--- a/src/core_api.cpp
+++ b/src/core_api.cpp
@ -2091,13 +2091,25 @@ bool post_create_event(const std::shared_ptr<http_req>& req, const std::shared_p
 bool get_analytics_rules(const std::shared_ptr<http_req>& req, const std::shared_ptr<http_res>& res) {
    auto rules_op = AnalyticsManager::get_instance().list_rules();

-    if(rules_op.ok()) {
-        res->set_200(rules_op.get().dump());
-        return true;
+    if(!rules_op.ok()) {
+        res->set(rules_op.code(), rules_op.error());
+        return false;
    }

-    res->set(rules_op.code(), rules_op.error());
-    return false;
+    res->set_200(rules_op.get().dump());
+    return true;
+}
+
+bool get_analytics_rule(const std::shared_ptr<http_req>& req, const std::shared_ptr<http_res>& res) {
+    auto rules_op = AnalyticsManager::get_instance().get_rule(req->params["name"]);
+
+    if(!rules_op.ok()) {
+        res->set(rules_op.code(), rules_op.error());
+        return false;
+    }
+
+    res->set_200(rules_op.get().dump());
+    return true;
 }

 bool post_create_analytics_rules(const std::shared_ptr<http_req>& req, const std::shared_ptr<http_res>& res) {
@ -2111,7 +2123,7 @@ bool post_create_analytics_rules(const std::shared_ptr<http_req>& req, const std
        return false;
    }

-    auto op = AnalyticsManager::get_instance().create_rule(req_json);
+    auto op = AnalyticsManager::get_instance().create_rule(req_json, false, true);

    if(!op.ok()) {
        res->set(op.code(), op.error());
@ -2122,6 +2134,29 @@ bool post_create_analytics_rules(const std::shared_ptr<http_req>& req, const std
    return true;
 }

+bool put_upsert_analytics_rules(const std::shared_ptr<http_req> &req, const std::shared_ptr<http_res> &res) {
+    nlohmann::json req_json;
+
+    try {
+        req_json = nlohmann::json::parse(req->body);
+    } catch(const std::exception& e) {
+        LOG(ERROR) << "JSON error: " << e.what();
+        res->set_400("Bad JSON.");
+        return false;
+    }
+
+    req_json["name"] = req->params["name"];
+    auto op = AnalyticsManager::get_instance().create_rule(req_json, true, true);
+
+    if(!op.ok()) {
+        res->set(op.code(), op.error());
+        return false;
+    }
+
+    res->set_200(req_json.dump());
+    return true;
+}
+
 bool del_analytics_rules(const std::shared_ptr<http_req>& req, const std::shared_ptr<http_res>& res) {
    auto op = AnalyticsManager::get_instance().remove_rule(req->params["name"]);
    if(!op.ok()) {
@ -2129,11 +2164,10 @@ bool del_analytics_rules(const std::shared_ptr<http_req>& req, const std::shared
        return false;
    }

-    res->set_200(R"({"ok": true)");
+    res->set_200(R"({"ok": true})");
    return true;
 }

-
 bool post_proxy(const std::shared_ptr<http_req>& req, const std::shared_ptr<http_res>& res) {
    HttpProxy& proxy = HttpProxy::get_instance();

@ -2193,4 +2227,4 @@ bool post_proxy(const std::shared_ptr<http_req>& req, const std::shared_ptr<http

    res->set_200(response.body);
    return true;
-}
+}
--- a/src/http_proxy.cpp
+++ b/src/http_proxy.cpp
@ -8,17 +8,19 @@ HttpProxy::HttpProxy() : cache(30s){
 }


-http_proxy_res_t HttpProxy::call(const std::string& url, const std::string& method, const std::string& body, const std::unordered_map<std::string, std::string>& headers) {
+http_proxy_res_t HttpProxy::call(const std::string& url, const std::string& method, 
+                                 const std::string& body, const std::unordered_map<std::string, std::string>& headers,
+                                 const size_t timeout_ms) {
    HttpClient& client = HttpClient::get_instance();
    http_proxy_res_t res;
    if(method == "GET") {
-        res.status_code = client.get_response(url, res.body, res.headers, headers, 20 * 1000);
+        res.status_code = client.get_response(url, res.body, res.headers, headers, timeout_ms);
    } else if(method == "POST") {
-        res.status_code = client.post_response(url, body, res.body, res.headers, headers, 20 * 1000);
+        res.status_code = client.post_response(url, body, res.body, res.headers, headers, timeout_ms);
    } else if(method == "PUT") {
-        res.status_code = client.put_response(url, body, res.body, res.headers, 20 * 1000);
+        res.status_code = client.put_response(url, body, res.body, res.headers, timeout_ms);
    } else if(method == "DELETE") {
-        res.status_code = client.delete_response(url, res.body, res.headers, 20 * 1000);
+        res.status_code = client.delete_response(url, res.body, res.headers, timeout_ms);
    } else {
        res.status_code = 400;
        nlohmann::json j;
@ -29,11 +31,25 @@ http_proxy_res_t HttpProxy::call(const std::string& url, const std::string& meth
 }


-http_proxy_res_t HttpProxy::send(const std::string& url, const std::string& method, const std::string& body, const std::unordered_map<std::string, std::string>& headers) {
+http_proxy_res_t HttpProxy::send(const std::string& url, const std::string& method, const std::string& body, std::unordered_map<std::string, std::string>& headers) {
    // check if url is in cache
    uint64_t key = StringUtils::hash_wy(url.c_str(), url.size());
    key = StringUtils::hash_combine(key, StringUtils::hash_wy(method.c_str(), method.size()));
    key = StringUtils::hash_combine(key, StringUtils::hash_wy(body.c_str(), body.size()));
+
+    size_t timeout_ms = 30000;
+    size_t num_try = 2;
+
+    if(headers.find("timeout_ms") != headers.end()){
+        timeout_ms = std::stoul(headers.at("timeout_ms"));
+        headers.erase("timeout_ms");
+    }
+
+    if(headers.find("num_try") != headers.end()){
+        num_try = std::stoul(headers.at("num_try"));
+        headers.erase("num_try");
+    }
+
    for(auto& header : headers){
        key = StringUtils::hash_combine(key, StringUtils::hash_wy(header.first.c_str(), header.first.size()));
        key = StringUtils::hash_combine(key, StringUtils::hash_wy(header.second.c_str(), header.second.size()));
@ -42,11 +58,13 @@ http_proxy_res_t HttpProxy::send(const std::string& url, const std::string& meth
        return cache[key];
    }

-    auto res = call(url, method, body, headers);
+    http_proxy_res_t res;
+    for(size_t i = 0; i < num_try; i++){
+        res = call(url, method, body, headers, timeout_ms);

-    if(res.status_code == 408){
-        // retry
-        res = call(url, method, body, headers);
+        if(res.status_code != 408 && res.status_code < 500){
+            break;
+        }
    }

    if(res.status_code == 408){
@ -54,10 +72,9 @@ http_proxy_res_t HttpProxy::send(const std::string& url, const std::string& meth
        j["message"] = "Server error on remote server. Please try again later.";
        res.body = j.dump();
    }
-    

    // add to cache 
-    if(res.status_code != 408){
+    if(res.status_code == 200){
        cache.insert(key, res);
    }

--- a/src/http_server.cpp
+++ b/src/http_server.cpp
@ -808,9 +808,11 @@ void HttpServer::stream_response(stream_response_state_t& state) {

    h2o_req_t* req = state.get_req();

-    if(state.is_res_start) {
+    bool start_of_res = (req->res.status == 0);
+
+    if(start_of_res) {
        h2o_add_header(&req->pool, &req->res.headers, H2O_TOKEN_CONTENT_TYPE, NULL,
-                       state.res_content_type.base, state.res_content_type.len);
+                       state.res_content_type.data(), state.res_content_type.size());
        req->res.status = state.status;
        req->res.reason = state.reason;
    }
@ -829,7 +831,7 @@ void HttpServer::stream_response(stream_response_state_t& state) {
        return ;
    }

-    if (state.is_res_start) {
+    if (start_of_res) {
        /*LOG(INFO) << "h2o_start_response, content_type=" << state.res_content_type
                  << ",response.status_code=" << state.res_status_code;*/
        h2o_start_response(req, state.generator);
@ -969,7 +971,7 @@ bool HttpServer::on_stream_response_message(void *data) {
    // NOTE: access to `req` and `res` objects must be synchronized and wrapped by `req_res`

    if(req_res->is_alive()) {
-        stream_response(req_res->res_state);
+        stream_response(req_res->get_res_state());
    } else {
        // serialized request or generator has been disposed (underlying request is probably dead)
        req_res->req_notify();
--- a/src/index.cpp
+++ b/src/index.cpp
@ -6268,7 +6268,9 @@ void Index::get_doc_changes(const index_operation_t op, const tsl::htrie_map<cha
        if(it.value().is_null()) {
            // null values should not be indexed
            new_doc.erase(it.key());
-            del_doc[it.key()] = old_doc[it.key()];
+            if(old_doc.contains(it.key())) {
+                del_doc[it.key()] = old_doc[it.key()];
+            }
            it = update_doc.erase(it);
            continue;
        }
--- a/src/main/typesense_server.cpp
+++ b/src/main/typesense_server.cpp
@ -70,7 +70,9 @@ void master_server_routes() {

    // analytics
    server->get("/analytics/rules", get_analytics_rules);
+    server->get("/analytics/rules/:name", get_analytics_rule);
    server->post("/analytics/rules", post_create_analytics_rules);
+    server->put("/analytics/rules/:name", put_upsert_analytics_rules);
    server->del("/analytics/rules/:name", del_analytics_rules);
    server->post("/analytics/events", post_create_event);

--- a/src/text_embedder.cpp
+++ b/src/text_embedder.cpp
@ -83,9 +83,9 @@ std::vector<float> TextEmbedder::mean_pooling(const std::vector<std::vector<floa
    return pooled_output;
 }

-embedding_res_t TextEmbedder::Embed(const std::string& text) {
+embedding_res_t TextEmbedder::Embed(const std::string& text, const size_t remote_embedder_timeout_ms, const size_t remote_embedding_num_try) {
    if(is_remote()) {
-        return remote_embedder_->Embed(text);
+        return remote_embedder_->Embed(text, remote_embedder_timeout_ms, remote_embedding_num_try);
    } else {
        // Cannot run same model in parallel, so lock the mutex
        std::lock_guard<std::mutex> lock(mutex_);
--- a/src/text_embedder_remote.cpp
+++ b/src/text_embedder_remote.cpp
@ -132,53 +132,27 @@ Option<bool> OpenAIEmbedder::is_model_valid(const nlohmann::json& model_config,
    return Option<bool>(true);
 }

-embedding_res_t OpenAIEmbedder::Embed(const std::string& text) {
+embedding_res_t OpenAIEmbedder::Embed(const std::string& text, const size_t remote_embedder_timeout_ms, const size_t remote_embedding_num_try) {
    std::unordered_map<std::string, std::string> headers;
    std::map<std::string, std::string> res_headers;
    headers["Authorization"] = "Bearer " + api_key;
    headers["Content-Type"] = "application/json";
+    headers["timeout_ms"] = std::to_string(remote_embedder_timeout_ms);
+    headers["num_try"] = std::to_string(remote_embedding_num_try);
    std::string res;
    nlohmann::json req_body;
-    req_body["input"] = text;
+    req_body["input"] = std::vector<std::string>{text};
    // remove "openai/" prefix
    req_body["model"] = TextEmbedderManager::get_model_name_without_namespace(openai_model_path);
    auto res_code = call_remote_api("POST", OPENAI_CREATE_EMBEDDING, req_body.dump(), res, res_headers, headers);
    if (res_code != 200) {
-        nlohmann::json json_res;
-        try {
-            json_res = nlohmann::json::parse(res);
-        } catch (const std::exception& e) {
-            json_res = nlohmann::json::object();
-            json_res["error"] = "Malformed response from OpenAI API.";
-            
-        }
-        nlohmann::json embedding_res = nlohmann::json::object();
-        embedding_res["response"] = json_res;
-        embedding_res["request"] = nlohmann::json::object();
-        embedding_res["request"]["url"] = OPENAI_CREATE_EMBEDDING;
-        embedding_res["request"]["method"] = "POST";
-        embedding_res["request"]["body"] = req_body;
-
-        if(json_res.count("error") != 0 && json_res["error"].count("message") != 0) {
-            embedding_res["error"] = "OpenAI API error: " + json_res["error"]["message"].get<std::string>();
-        }
-        if(res_code == 408) {
-            embedding_res["error"] = "OpenAI API timeout.";
-        }
-        return embedding_res_t(res_code, embedding_res);
+        return embedding_res_t(res_code, get_error_json(req_body, res_code, res));
    }
    try {
        embedding_res_t embedding_res = embedding_res_t(nlohmann::json::parse(res)["data"][0]["embedding"].get<std::vector<float>>());
        return embedding_res;
    } catch (const std::exception& e) {
-        nlohmann::json embedding_res = nlohmann::json::object();
-        embedding_res["request"] = nlohmann::json::object();
-        embedding_res["request"]["url"] = OPENAI_CREATE_EMBEDDING;
-        embedding_res["request"]["method"] = "POST";
-        embedding_res["request"]["body"] = req_body;
-        embedding_res["error"] = "Malformed response from OpenAI API.";
-
-        return embedding_res_t(500, embedding_res);
+        return embedding_res_t(500, get_error_json(req_body, res_code, res));
    }
 }

@ -206,46 +180,19 @@ std::vector<embedding_res_t> OpenAIEmbedder::batch_embed(const std::vector<std::

    if(res_code != 200) {
        std::vector<embedding_res_t> outputs;
-
-        nlohmann::json json_res;
-        try {
-            json_res = nlohmann::json::parse(res);
-        } catch (const std::exception& e) {
-            json_res = nlohmann::json::object();
-            json_res["error"] = "Malformed response from OpenAI API.";
-        }
-        LOG(INFO) << "OpenAI API error: " << json_res.dump();
-        nlohmann::json embedding_res = nlohmann::json::object();
-        embedding_res["response"] = json_res;
-        embedding_res["request"] = nlohmann::json::object();
-        embedding_res["request"]["url"] = OPENAI_CREATE_EMBEDDING;
-        embedding_res["request"]["method"] = "POST";
-        embedding_res["request"]["body"] = req_body;
-        embedding_res["request"]["body"]["input"] = std::vector<std::string>{inputs[0]};
-        if(json_res.count("error") != 0 && json_res["error"].count("message") != 0) {
-            embedding_res["error"] = "OpenAI API error: " + json_res["error"]["message"].get<std::string>();
-        }
-        if(res_code == 408) {
-            embedding_res["error"] = "OpenAI API timeout.";
-        }
-
+        nlohmann::json embedding_res = get_error_json(req_body, res_code, res);
        for(size_t i = 0; i < inputs.size(); i++) {
            embedding_res["request"]["body"]["input"][0] = inputs[i];
            outputs.push_back(embedding_res_t(res_code, embedding_res));
        }
        return outputs;
    }
+
    nlohmann::json res_json;
    try {
        res_json = nlohmann::json::parse(res);
    } catch (const std::exception& e) {
-        nlohmann::json embedding_res = nlohmann::json::object();
-        embedding_res["request"] = nlohmann::json::object();
-        embedding_res["request"]["url"] = OPENAI_CREATE_EMBEDDING;
-        embedding_res["request"]["method"] = "POST";
-        embedding_res["request"]["body"] = req_body;
-        embedding_res["request"]["body"]["input"] = std::vector<std::string>{inputs[0]};
-        embedding_res["error"] = "Malformed response from OpenAI API";
+        nlohmann::json embedding_res = get_error_json(req_body, res_code, res);
        std::vector<embedding_res_t> outputs;
        for(size_t i = 0; i < inputs.size(); i++) {
            embedding_res["request"]["body"]["input"][0] = inputs[i];
@ -262,6 +209,36 @@ std::vector<embedding_res_t> OpenAIEmbedder::batch_embed(const std::vector<std::
 }


+nlohmann::json OpenAIEmbedder::get_error_json(const nlohmann::json& req_body, long res_code, const std::string& res_body) {
+    nlohmann::json json_res;
+    try {
+        json_res = nlohmann::json::parse(res_body);
+    } catch (const std::exception& e) {
+        json_res = nlohmann::json::object();
+        json_res["error"] = "Malformed response from OpenAI API.";
+    }
+    nlohmann::json embedding_res = nlohmann::json::object();
+    embedding_res["response"] = json_res;
+    embedding_res["request"] = nlohmann::json::object();
+    embedding_res["request"]["url"] = OPENAI_CREATE_EMBEDDING;
+    embedding_res["request"]["method"] = "POST";
+    embedding_res["request"]["body"] = req_body;
+    if(embedding_res["request"]["body"].count("input") > 0 && embedding_res["request"]["body"]["input"].get<std::vector<std::string>>().size() > 1) {
+        auto vec = embedding_res["request"]["body"]["input"].get<std::vector<std::string>>();
+        vec.resize(1);
+        embedding_res["request"]["body"]["input"] = vec;
+    }
+    if(json_res.count("error") != 0 && json_res["error"].count("message") != 0) {
+        embedding_res["error"] = "OpenAI API error: " + json_res["error"]["message"].get<std::string>();
+    }
+    if(res_code == 408) {
+        embedding_res["error"] = "OpenAI API timeout.";
+    }
+
+    return embedding_res;
+}
+
+
 GoogleEmbedder::GoogleEmbedder(const std::string& google_api_key) : google_api_key(google_api_key) {

 }
@ -320,10 +297,12 @@ Option<bool> GoogleEmbedder::is_model_valid(const nlohmann::json& model_config,
    return Option<bool>(true);
 }

-embedding_res_t GoogleEmbedder::Embed(const std::string& text) {
+embedding_res_t GoogleEmbedder::Embed(const std::string& text, const size_t remote_embedder_timeout_ms, const size_t remote_embedding_num_try) {
    std::unordered_map<std::string, std::string> headers;
    std::map<std::string, std::string> res_headers;
    headers["Content-Type"] = "application/json";
+    headers["timeout_ms"] = std::to_string(remote_embedder_timeout_ms);
+    headers["num_try"] = std::to_string(remote_embedding_num_try);
    std::string res;
    nlohmann::json req_body;
    req_body["text"] = text;
@ -331,37 +310,13 @@ embedding_res_t GoogleEmbedder::Embed(const std::string& text) {
    auto res_code = call_remote_api("POST", std::string(GOOGLE_CREATE_EMBEDDING) + google_api_key, req_body.dump(), res, res_headers, headers);

    if(res_code != 200) {
-        nlohmann::json json_res;
-        try {
-            nlohmann::json json_res = nlohmann::json::parse(res);
-        } catch (const std::exception& e) {
-            json_res = nlohmann::json::object();
-            json_res["error"] = "Malformed response from Google API.";
-        }
-        nlohmann::json embedding_res = nlohmann::json::object();
-        embedding_res["response"] = json_res;
-        embedding_res["request"] = nlohmann::json::object();
-        embedding_res["request"]["url"] = GOOGLE_CREATE_EMBEDDING;
-        embedding_res["request"]["method"] = "POST";
-        embedding_res["request"]["body"] = req_body;
-        if(json_res.count("error") != 0 && json_res["error"].count("message") != 0) {
-            embedding_res["error"] = "Google API error: " + json_res["error"]["message"].get<std::string>();
-        }
-        if(res_code == 408) {
-            embedding_res["error"] = "Google API timeout.";
-        }
-        return embedding_res_t(res_code, embedding_res);
+        return embedding_res_t(res_code, get_error_json(req_body, res_code, res));
    }
+
    try {
        return embedding_res_t(nlohmann::json::parse(res)["embedding"]["value"].get<std::vector<float>>());
    } catch (const std::exception& e) {
-        nlohmann::json embedding_res = nlohmann::json::object();
-        embedding_res["request"] = nlohmann::json::object();
-        embedding_res["request"]["url"] = GOOGLE_CREATE_EMBEDDING;
-        embedding_res["request"]["method"] = "POST";
-        embedding_res["request"]["body"] = req_body;
-        embedding_res["error"] = "Malformed response from Google API.";
-        return embedding_res_t(500, embedding_res);
+        return embedding_res_t(500, get_error_json(req_body, res_code, res));
    }
 }

@ -376,6 +331,30 @@ std::vector<embedding_res_t> GoogleEmbedder::batch_embed(const std::vector<std::
    return outputs;
 }

+nlohmann::json GoogleEmbedder::get_error_json(const nlohmann::json& req_body, long res_code, const std::string& res_body) {
+    nlohmann::json json_res;
+    try {
+        nlohmann::json json_res = nlohmann::json::parse(res_body);
+    } catch (const std::exception& e) {
+        json_res = nlohmann::json::object();
+        json_res["error"] = "Malformed response from Google API.";
+    }
+    nlohmann::json embedding_res = nlohmann::json::object();
+    embedding_res["response"] = json_res;
+    embedding_res["request"] = nlohmann::json::object();
+    embedding_res["request"]["url"] = GOOGLE_CREATE_EMBEDDING;
+    embedding_res["request"]["method"] = "POST";
+    embedding_res["request"]["body"] = req_body;
+    if(json_res.count("error") != 0 && json_res["error"].count("message") != 0) {
+        embedding_res["error"] = "Google API error: " + json_res["error"]["message"].get<std::string>();
+    }
+    if(res_code == 408) {
+        embedding_res["error"] = "Google API timeout.";
+    }
+
+    return embedding_res;
+}
+

 GCPEmbedder::GCPEmbedder(const std::string& project_id, const std::string& model_name, const std::string& access_token, 
                         const std::string& refresh_token, const std::string& client_id, const std::string& client_secret) : 
@ -453,7 +432,7 @@ Option<bool> GCPEmbedder::is_model_valid(const nlohmann::json& model_config, uns
    return Option<bool>(true);
 }

-embedding_res_t GCPEmbedder::Embed(const std::string& text) {
+embedding_res_t GCPEmbedder::Embed(const std::string& text, const size_t remote_embedder_timeout_ms, const size_t remote_embedding_num_try) {
    nlohmann::json req_body;
    req_body["instances"] = nlohmann::json::array();
    nlohmann::json instance;
@ -462,6 +441,8 @@ embedding_res_t GCPEmbedder::Embed(const std::string& text) {
    std::unordered_map<std::string, std::string> headers;
    headers["Authorization"] = "Bearer " + access_token;
    headers["Content-Type"] = "application/json";
+    headers["timeout_ms"] = std::to_string(remote_embedder_timeout_ms);
+    headers["num_try"] = std::to_string(remote_embedding_num_try);
    std::map<std::string, std::string> res_headers;
    std::string res;

@ -483,41 +464,13 @@ embedding_res_t GCPEmbedder::Embed(const std::string& text) {
    }

    if(res_code != 200) {
-        nlohmann::json json_res;
-        try {
-            json_res = nlohmann::json::parse(res);
-        } catch (const std::exception& e) {
-            json_res = nlohmann::json::object();
-            json_res["error"] = "Malformed response from GCP API.";
-        }
-        nlohmann::json embedding_res = nlohmann::json::object();
-        embedding_res["response"] = json_res;
-        embedding_res["request"] = nlohmann::json::object();
-        embedding_res["request"]["url"] = get_gcp_embedding_url(project_id, model_name);
-        embedding_res["request"]["method"] = "POST";
-        embedding_res["request"]["body"] = req_body;
-        if(json_res.count("error") != 0 && json_res["error"].count("message") != 0) {
-            embedding_res["error"] = "GCP API error: " + json_res["error"]["message"].get<std::string>();
-        } else {
-            embedding_res["error"] = "Malformed response from GCP API.";
-        }
-
-        if(res_code == 408) {
-            embedding_res["error"] = "GCP API timeout.";
-        }
-        return embedding_res_t(res_code, embedding_res);
+        return embedding_res_t(res_code, get_error_json(req_body, res_code, res));
    }
    nlohmann::json res_json;
    try {
        res_json = nlohmann::json::parse(res);
    } catch (const std::exception& e) {
-        nlohmann::json embedding_res = nlohmann::json::object();
-        embedding_res["request"] = nlohmann::json::object();
-        embedding_res["request"]["url"] = get_gcp_embedding_url(project_id, model_name);
-        embedding_res["request"]["method"] = "POST";
-        embedding_res["request"]["body"] = req_body;
-        embedding_res["error"] = "Malformed response from GCP API.";
-        return embedding_res_t(500, embedding_res);
+        return embedding_res_t(500, get_error_json(req_body, res_code, res));
    }
    return embedding_res_t(res_json["predictions"][0]["embeddings"]["values"].get<std::vector<float>>());
 }
@ -566,28 +519,7 @@ std::vector<embedding_res_t> GCPEmbedder::batch_embed(const std::vector<std::str
    }

    if(res_code != 200) {
-        nlohmann::json json_res;
-        try {
-            json_res = nlohmann::json::parse(res);
-        } catch (const std::exception& e) {
-            json_res = nlohmann::json::object();
-            json_res["error"] = "Malformed response from GCP API.";
-        }
-        nlohmann::json embedding_res = nlohmann::json::object();
-        embedding_res["response"] = json_res;
-        embedding_res["request"] = nlohmann::json::object();
-        embedding_res["request"]["url"] = get_gcp_embedding_url(project_id, model_name);
-        embedding_res["request"]["method"] = "POST";
-        embedding_res["request"]["body"] = req_body;
-        if(json_res.count("error") != 0 && json_res["error"].count("message") != 0) {
-            embedding_res["error"] = "GCP API error: " + json_res["error"]["message"].get<std::string>();
-        } else {
-            embedding_res["error"] = "Malformed response from GCP API.";
-        }
-
-        if(res_code == 408) {
-            embedding_res["error"] = "GCP API timeout.";
-        }
+        auto embedding_res = get_error_json(req_body, res_code, res);
        std::vector<embedding_res_t> outputs;
        for(size_t i = 0; i < inputs.size(); i++) {
            outputs.push_back(embedding_res_t(res_code, embedding_res));
@ -598,12 +530,7 @@ std::vector<embedding_res_t> GCPEmbedder::batch_embed(const std::vector<std::str
    try {
        res_json = nlohmann::json::parse(res);
    } catch (const std::exception& e) {
-        nlohmann::json embedding_res = nlohmann::json::object();
-        embedding_res["error"] = "Nalformed response from GCP API.";
-        embedding_res["request"] = nlohmann::json::object();
-        embedding_res["request"]["url"] = get_gcp_embedding_url(project_id, model_name);
-        embedding_res["request"]["method"] = "POST";
-        embedding_res["request"]["body"] = req_body;
+        nlohmann::json embedding_res = get_error_json(req_body, res_code, res);
        std::vector<embedding_res_t> outputs;
        for(size_t i = 0; i < inputs.size(); i++) {
            outputs.push_back(embedding_res_t(400, embedding_res));
@ -618,6 +545,34 @@ std::vector<embedding_res_t> GCPEmbedder::batch_embed(const std::vector<std::str
    return outputs;
 }

+
+nlohmann::json GCPEmbedder::get_error_json(const nlohmann::json& req_body, long res_code, const std::string& res_body) {
+    nlohmann::json json_res;
+    try {
+        json_res = nlohmann::json::parse(res_body);
+    } catch (const std::exception& e) {
+        json_res = nlohmann::json::object();
+        json_res["error"] = "Malformed response from GCP API.";
+    }
+    nlohmann::json embedding_res = nlohmann::json::object();
+    embedding_res["response"] = json_res;
+    embedding_res["request"] = nlohmann::json::object();
+    embedding_res["request"]["url"] = get_gcp_embedding_url(project_id, model_name);
+    embedding_res["request"]["method"] = "POST";
+    embedding_res["request"]["body"] = req_body;
+    if(json_res.count("error") != 0 && json_res["error"].count("message") != 0) {
+        embedding_res["error"] = "GCP API error: " + json_res["error"]["message"].get<std::string>();
+    } else {
+        embedding_res["error"] = "Malformed response from GCP API.";
+    }
+
+    if(res_code == 408) {
+        embedding_res["error"] = "GCP API timeout.";
+    }
+
+    return embedding_res;
+}
+
 Option<std::string> GCPEmbedder::generate_access_token(const std::string& refresh_token, const std::string& client_id, const std::string& client_secret) {
    std::unordered_map<std::string, std::string> headers;
    headers["Content-Type"] = "application/x-www-form-urlencoded";
@ -653,5 +608,3 @@ Option<std::string> GCPEmbedder::generate_access_token(const std::string& refres

    return Option<std::string>(access_token);
 }
-
-
--- a/test/analytics_manager_test.cpp
+++ b/test/analytics_manager_test.cpp
@ -78,7 +78,7 @@ TEST_F(AnalyticsManagerTest, AddSuggestion) {
        }
    })"_json;

-    auto create_op = analyticsManager.create_rule(analytics_rule);
+    auto create_op = analyticsManager.create_rule(analytics_rule, false, true);
    ASSERT_TRUE(create_op.ok());

    std::string q = "foobar";
@ -88,4 +88,97 @@ TEST_F(AnalyticsManagerTest, AddSuggestion) {
    auto userQueries = popularQueries["top_queries"]->get_user_prefix_queries()["1"];
    ASSERT_EQ(1, userQueries.size());
    ASSERT_EQ("foobar", userQueries[0].query);
+
+    // add another query which is more popular
+    q = "buzzfoo";
+    analyticsManager.add_suggestion("titles", q, true, "1");
+    analyticsManager.add_suggestion("titles", q, true, "2");
+    analyticsManager.add_suggestion("titles", q, true, "3");
+
+    popularQueries = analyticsManager.get_popular_queries();
+    userQueries = popularQueries["top_queries"]->get_user_prefix_queries()["1"];
+    ASSERT_EQ(2, userQueries.size());
+    ASSERT_EQ("foobar", userQueries[0].query);
+    ASSERT_EQ("buzzfoo", userQueries[1].query);
 }
+
+TEST_F(AnalyticsManagerTest, GetAndDeleteSuggestions) {
+    nlohmann::json analytics_rule = R"({
+        "name": "top_search_queries",
+        "type": "popular_queries",
+        "params": {
+            "limit": 100,
+            "source": {
+                "collections": ["titles"]
+            },
+            "destination": {
+                "collection": "top_queries"
+            }
+        }
+    })"_json;
+
+    auto create_op = analyticsManager.create_rule(analytics_rule, false, true);
+    ASSERT_TRUE(create_op.ok());
+
+    analytics_rule = R"({
+        "name": "top_search_queries2",
+        "type": "popular_queries",
+        "params": {
+            "limit": 100,
+            "source": {
+                "collections": ["titles"]
+            },
+            "destination": {
+                "collection": "top_queries"
+            }
+        }
+    })"_json;
+
+    create_op = analyticsManager.create_rule(analytics_rule, false, true);
+    ASSERT_TRUE(create_op.ok());
+
+    auto rules = analyticsManager.list_rules().get()["rules"];
+    ASSERT_EQ(2, rules.size());
+
+    ASSERT_TRUE(analyticsManager.get_rule("top_search_queries").ok());
+    ASSERT_TRUE(analyticsManager.get_rule("top_search_queries2").ok());
+
+    auto missing_rule_op = analyticsManager.get_rule("top_search_queriesX");
+    ASSERT_FALSE(missing_rule_op.ok());
+    ASSERT_EQ(404, missing_rule_op.code());
+    ASSERT_EQ("Rule not found.", missing_rule_op.error());
+
+    // upsert rule that already exists
+    analytics_rule = R"({
+        "name": "top_search_queries2",
+        "type": "popular_queries",
+        "params": {
+            "limit": 100,
+            "source": {
+                "collections": ["titles"]
+            },
+            "destination": {
+                "collection": "top_queriesUpdated"
+            }
+        }
+    })"_json;
+    create_op = analyticsManager.create_rule(analytics_rule, true, true);
+    ASSERT_TRUE(create_op.ok());
+    auto existing_rule = analyticsManager.get_rule("top_search_queries2").get();
+    ASSERT_EQ("top_queriesUpdated", existing_rule["params"]["destination"]["collection"].get<std::string>());
+
+    // reject when upsert is not enabled
+    create_op = analyticsManager.create_rule(analytics_rule, false, true);
+    ASSERT_FALSE(create_op.ok());
+    ASSERT_EQ("There's already another configuration with the name `top_search_queries2`.", create_op.error());
+
+    // try deleting both rules
+    analyticsManager.remove_rule("top_search_queries");
+    analyticsManager.remove_rule("top_search_queries2");
+
+    missing_rule_op = analyticsManager.get_rule("top_search_queries");
+    ASSERT_FALSE(missing_rule_op.ok());
+    missing_rule_op = analyticsManager.get_rule("top_search_queries2");
+    ASSERT_FALSE(missing_rule_op.ok());
+}
+
--- a/test/collection_specific_more_test.cpp
+++ b/test/collection_specific_more_test.cpp
@ -1311,6 +1311,21 @@ TEST_F(CollectionSpecificMoreTest, UpdateArrayWithNullValue) {
    auto results = coll1->search("alpha", {"tags"}, "", {}, {}, {0}, 10, 1, FREQUENCY, {false}).get();
    ASSERT_EQ(0, results["found"].get<size_t>());

+    // update document with no value (optional field) with a null value
+    auto doc3 = R"({
+        "id": "2"
+    })"_json;
+
+    ASSERT_TRUE(coll1->add(doc3.dump(), CREATE).ok());
+    results = coll1->search("alpha", {"tags"}, "", {}, {}, {0}, 10, 1, FREQUENCY, {false}).get();
+    ASSERT_EQ(0, results["found"].get<size_t>());
+
+    doc_update = R"({
+        "id": "2",
+        "tags": null
+    })"_json;
+    ASSERT_TRUE(coll1->add(doc_update.dump(), UPDATE).ok());
+
    // via upsert

    doc_update = R"({
--- a/test/collection_test.cpp
+++ b/test/collection_test.cpp
@ -1054,11 +1054,11 @@ TEST_F(CollectionTest, KeywordQueryReturnsResultsBasedOnPerPageParam) {
    ASSERT_EQ(422, res_op.code());
    ASSERT_STREQ("Only upto 250 hits can be fetched per page.", res_op.error().c_str());

-    // when page number is not valid
-    res_op = coll_mul_fields->search("w", query_fields, "", facets, sort_fields, {0}, 10, 0,
-                                FREQUENCY, {true}, 1000, empty, empty, 10);
-    ASSERT_FALSE(res_op.ok());
-    ASSERT_EQ(422, res_op.code());
+    // when page number is zero, use the first page
+    results = coll_mul_fields->search("w", query_fields, "", facets, sort_fields, {0}, 3, 0,
+                                FREQUENCY, {true}, 1000, empty, empty, 10).get();
+    ASSERT_EQ(3, results["hits"].size());
+    ASSERT_EQ(6, results["found"].get<int>());

    // do pagination

@ -3026,11 +3026,11 @@ TEST_F(CollectionTest, WildcardQueryReturnsResultsBasedOnPerPageParam) {
    ASSERT_EQ(422, res_op.code());
    ASSERT_STREQ("Only upto 250 hits can be fetched per page.", res_op.error().c_str());

-    // when page number is not valid
-    res_op = collection->search("*", query_fields, "", facets, sort_fields, {0}, 10, 0,
-                                     FREQUENCY, {false}, 1000, empty, empty, 10);
-    ASSERT_FALSE(res_op.ok());
-    ASSERT_EQ(422, res_op.code());
+    // when page number is 0, just fetch first page
+    results = collection->search("*", query_fields, "", facets, sort_fields, {0}, 10, 0,
+                                     FREQUENCY, {false}, 1000, empty, empty, 10).get();
+    ASSERT_EQ(10, results["hits"].size());
+    ASSERT_EQ(25, results["found"].get<int>());

    // do pagination

@ -5176,4 +5176,45 @@ TEST_F(CollectionTest, EmbeddingFieldEmptyArrayInDocument) {
    ASSERT_FALSE(get_op.get()["embedding"].is_null());

    ASSERT_EQ(384, get_op.get()["embedding"].size());
+}
+
+
+TEST_F(CollectionTest, CatchPartialResponseFromRemoteEmbedding) {
+    std::string partial_json = R"({
+        "results": [
+            {
+                "embedding": [
+                    0.0,
+                    0.0,
+                    0.0
+                ],
+                "text": "butter"
+            },
+            {
+                "embedding": [
+                    0.0,
+                    0.0,
+                    0.0
+                ],
+                "text": "butterball"
+            },
+            {
+                "embedding": [
+                    0.0,
+                    0.0)";
+    
+    nlohmann::json req_body = R"({
+        "inputs": [
+            "butter",
+            "butterball",
+            "butterfly"
+        ]
+    })"_json;
+
+    OpenAIEmbedder embedder("", "");
+
+    auto res = embedder.get_error_json(req_body, 200, partial_json);
+
+    ASSERT_EQ(res["response"]["error"], "Malformed response from OpenAI API.");
+    ASSERT_EQ(res["request"]["body"], req_body);
 }
--- a/test/core_api_utils_test.cpp
+++ b/test/core_api_utils_test.cpp
@ -768,7 +768,7 @@ TEST_F(CoreAPIUtilsTest, SearchPagination) {
    ASSERT_EQ(400, results["code"].get<size_t>());
    ASSERT_EQ("Parameter `offset` must be an unsigned integer.", results["error"].get<std::string>());

-    // when page is 0 and no offset is sent
+    // when page is 0 and offset is NOT sent, we will treat as page=1
    search.clear();
    req->params.clear();
    body["searches"] = nlohmann::json::array();
@ -782,8 +782,29 @@ TEST_F(CoreAPIUtilsTest, SearchPagination) {

    post_multi_search(req, res);
    results = nlohmann::json::parse(res->body)["results"][0];
-    ASSERT_EQ(422, results["code"].get<size_t>());
-    ASSERT_EQ("Parameter `page` must be an integer of value greater than 0.", results["error"].get<std::string>());
+    ASSERT_EQ(10, results["hits"].size());
+    ASSERT_EQ(1, results["page"].get<size_t>());
+    ASSERT_EQ(0, results.count("offset"));
+
+    // when both page and offset are sent, use page
+    search.clear();
+    req->params.clear();
+    body["searches"] = nlohmann::json::array();
+    search["collection"] = "coll1";
+    search["q"] = "title";
+    search["page"] = "2";
+    search["offset"] = "30";
+    search["query_by"] = "name";
+    search["sort_by"] = "points:desc";
+    body["searches"].push_back(search);
+    req->body = body.dump();
+
+    post_multi_search(req, res);
+    results = nlohmann::json::parse(res->body)["results"][0];
+    ASSERT_EQ(10, results["hits"].size());
+    ASSERT_EQ(2, results["page"].get<size_t>());
+    ASSERT_EQ(0, results.count("offset"));
+
 }

 TEST_F(CoreAPIUtilsTest, ExportWithFilter) {
@ -1116,4 +1137,27 @@ TEST_F(CoreAPIUtilsTest, TestProxyInvalid) {

    ASSERT_EQ(400, resp->status_code);
    ASSERT_EQ("Headers must be a JSON object.", nlohmann::json::parse(resp->body)["message"]);
+}
+
+
+
+TEST_F(CoreAPIUtilsTest, TestProxyTimeout) {
+    nlohmann::json body;
+
+    auto req = std::make_shared<http_req>();
+    auto resp = std::make_shared<http_res>(nullptr);
+
+    // test with url as empty string
+    body["url"] = "https://typesense.org/docs/";
+    body["method"] = "GET";
+    body["headers"] = nlohmann::json::object();
+    body["headers"]["timeout_ms"] = "1";
+    body["headers"]["num_retry"] = "1";
+
+    req->body = body.dump();
+
+    post_proxy(req, resp);
+
+    ASSERT_EQ(408, resp->status_code);
+    ASSERT_EQ("Server error on remote server. Please try again later.", nlohmann::json::parse(resp->body)["message"]);
 }
--- a/test/tokenizer_test.cpp
+++ b/test/tokenizer_test.cpp
@ -238,7 +238,7 @@ TEST(TokenizerTest, ShouldTokenizeLocaleText) {

    // window used to locate the starting offset for snippet on the text
    while(tokenizer.next(raw_token, raw_token_index, tok_start, tok_end)) {
-        LOG(INFO) << "tok_start: " << tok_start;
+        //LOG(INFO) << "tok_start: " << tok_start;
    }

    return ;