Merge branch 'v0.24-nested' into v0.25

# Conflicts: # include/collection.h # src/collection.cpp # src/collection_manager.cpp # test/collection_faceting_test.cpp
2025-05-17 20:22:32 +08:00 · 2023-01-03 08:56:48 +05:30 · 2023-01-03 08:56:48 +05:30 · b8b9fb20b3
commit b8b9fb20b3
parent e17badbb1e f380bd5fa9
29 changed files with 576 additions and 289 deletions
--- a/include/collection.h
+++ b/include/collection.h
@ -409,6 +409,8 @@ public:
                                  const size_t filter_curated_hits_option = 2,
                                  const bool prioritize_token_position = false,
                                  const std::string& vector_query_str = "",
+                                  const bool enable_highlight_v1 = true,
+                                  const uint64_t search_time_start_us = 0,
                                  const size_t facet_sample_percent = 100,
                                  const size_t facet_sample_threshold = 0) const;

--- a/include/collection_manager.h
+++ b/include/collection_manager.h
@ -177,12 +177,11 @@ public:

    static Option<bool> do_search(std::map<std::string, std::string>& req_params,
                                  nlohmann::json& embedded_params,
-                                  std::string& results_json_str);
+                                  std::string& results_json_str,
+                                  uint64_t start_ts);

    static bool parse_sort_by_str(std::string sort_by_str, std::vector<sort_by>& sort_fields);

-    static bool parse_vector_query_str(std::string vector_query_str, vector_query_t& vector_query);
-
    // symlinks
    Option<std::string> resolve_symlink(const std::string & symlink_name) const;

--- a/include/config.h
+++ b/include/config.h
@ -59,6 +59,8 @@ private:

    std::atomic<bool> skip_writes;

+    std::atomic<int> log_slow_searches_time_ms;
+
 protected:

    Config() {
@ -80,6 +82,7 @@ protected:
        this->disk_used_max_percentage = 100;
        this->memory_used_max_percentage = 100;
        this->skip_writes = false;
+        this->log_slow_searches_time_ms = 30 * 1000;
    }

    Config(Config const&) {
@ -142,6 +145,10 @@ public:
        this->log_slow_requests_time_ms = log_slow_requests_time_ms;
    }

+    void set_log_slow_searches_time_ms(int log_slow_searches_time_ms) {
+        this->log_slow_searches_time_ms = log_slow_searches_time_ms;
+    }
+
    void set_healthy_read_lag(size_t healthy_read_lag) {
        this->healthy_read_lag = healthy_read_lag;
    }
@ -245,6 +252,10 @@ public:
        return this->log_slow_requests_time_ms;
    }

+    int get_log_slow_searches_time_ms() const {
+        return this->log_slow_searches_time_ms;
+    }
+
    size_t get_num_collections_parallel_load() const {
        return this->num_collections_parallel_load;
    }
@ -364,6 +375,10 @@ public:
            this->log_slow_requests_time_ms = std::stoi(get_env("TYPESENSE_LOG_SLOW_REQUESTS_TIME_MS"));
        }

+        if(!get_env("TYPESENSE_LOG_SLOW_SEARCHES_TIME_MS").empty()) {
+            this->log_slow_searches_time_ms = std::stoi(get_env("TYPESENSE_LOG_SLOW_SEARCHES_TIME_MS"));
+        }
+
        if(!get_env("TYPESENSE_NUM_COLLECTIONS_PARALLEL_LOAD").empty()) {
            this->num_collections_parallel_load = std::stoi(get_env("TYPESENSE_NUM_COLLECTIONS_PARALLEL_LOAD"));
        }
@ -513,6 +528,10 @@ public:
            this->log_slow_requests_time_ms = (int) reader.GetInteger("server", "log-slow-requests-time-ms", -1);
        }

+        if(reader.Exists("server", "log-slow-searches-time-ms")) {
+            this->log_slow_searches_time_ms = (int) reader.GetInteger("server", "log-slow-searches-time-ms", 30*1000);
+        }
+
        if(reader.Exists("server", "num-collections-parallel-load")) {
            this->num_collections_parallel_load = (int) reader.GetInteger("server", "num-collections-parallel-load", 0);
        }
@ -643,6 +662,10 @@ public:
            this->log_slow_requests_time_ms = options.get<int>("log-slow-requests-time-ms");
        }

+        if(options.exist("log-slow-searches-time-ms")) {
+            this->log_slow_searches_time_ms = options.get<int>("log-slow-searches-time-ms");
+        }
+
        if(options.exist("num-collections-parallel-load")) {
            this->num_collections_parallel_load = options.get<uint32_t>("num-collections-parallel-load");
        }
--- a/include/core_api_utils.h
+++ b/include/core_api_utils.h
@ -24,6 +24,7 @@ struct export_state_t: public req_state_t {
    std::vector<size_t> offsets;
    std::set<std::string> include_fields;
    std::set<std::string> exclude_fields;
+    size_t export_batch_size = 100;
    std::string* res_body;

    bool filtered_export = false;
--- a/include/field.h
+++ b/include/field.h
@ -609,20 +609,6 @@ struct sort_by {
    }
 };

-struct vector_query_t {
-    std::string field_name;
-    size_t k = 0;
-    size_t flat_search_cutoff = 0;
-    std::vector<float> values;
-
-    void _reset() {
-        // used for testing only
-        field_name.clear();
-        k = 0;
-        values.clear();
-    }
-};
-
 class GeoPoint {
    constexpr static const double EARTH_RADIUS = 3958.75;
    constexpr static const double METER_CONVERT = 1609.00;
--- a/include/http_data.h
+++ b/include/http_data.h
@ -261,11 +261,13 @@ struct http_req {
            chunk_len(0), body(body), body_index(0), data(nullptr), ready(false),
            log_index(0), is_diposed(false), client_ip(client_ip) {

-        start_ts = std::chrono::duration_cast<std::chrono::microseconds>(
-                std::chrono::system_clock::now().time_since_epoch()).count();
-
        if(_req != nullptr) {
+            const auto& tv = _req->processed_at.at;
+            start_ts = (tv.tv_sec * 1000 * 1000) + tv.tv_usec;
            is_http_v1 = (_req->version < 0x200);
+        } else {
+            start_ts = std::chrono::duration_cast<std::chrono::microseconds>(
+                    std::chrono::system_clock::now().time_since_epoch()).count();
        }
    }

@ -279,21 +281,40 @@ struct http_req {
                    std::chrono::system_clock::now().time_since_epoch()).count();
            uint64_t ms_since_start = (now - start_ts) / 1000;

-            std::string metric_identifier = http_method + " " + path_without_query;
+            const std::string metric_identifier = http_method + " " + path_without_query;
            AppMetrics::get_instance().increment_duration(metric_identifier, ms_since_start);
            AppMetrics::get_instance().increment_write_metrics(route_hash, ms_since_start);

-            if(config.get_log_slow_requests_time_ms() >= 0 && int(ms_since_start) >= config.get_log_slow_requests_time_ms()) {
+            bool log_slow_searches = config.get_log_slow_searches_time_ms() >= 0 &&
+                                     int(ms_since_start) >= config.get_log_slow_searches_time_ms() &&
+                                     (path_without_query == "/multi_search" ||
+                                      StringUtils::ends_with(path_without_query, "/documents/search"));
+
+            bool log_slow_requests = config.get_log_slow_requests_time_ms() >= 0 &&
+                                     int(ms_since_start) >= config.get_log_slow_requests_time_ms();
+
+            if(log_slow_searches || log_slow_requests) {
                // log slow request if logging is enabled
                std::string query_string = "?";
-                for(const auto& kv: params) {
-                    if(kv.first != AUTH_HEADER) {
-                        query_string += kv.first + "=" + kv.second + "&";
+                bool is_multi_search_query = (path_without_query == "/multi_search");
+
+                if(is_multi_search_query) {
+                    StringUtils::erase_char(body, '\n');
+                } else {
+                    // ignore params map of multi_search since it is mutated for every search object in the POST body
+                    for(const auto& kv: params) {
+                        if(kv.first != AUTH_HEADER) {
+                            query_string += kv.first + "=" + kv.second + "&";
+                        }
                    }
                }
+
                std::string full_url_path = metric_identifier + query_string;
-                LOG(INFO) << "SLOW REQUEST: " << "(" + std::to_string(ms_since_start) + " ms) "
-                          << client_ip << " " << full_url_path;
+
+                // NOTE: we log the `body` ONLY for multi-search query
+                LOG(INFO) << "event=slow_request, time=" << ms_since_start << " ms"
+                          << ", client_ip=" << client_ip << ", endpoint=" << full_url_path
+                          << ", body=" << (is_multi_search_query ? body : "");
            }
        }

--- a/include/index.h
+++ b/include/index.h
@ -27,6 +27,7 @@
 #include "id_list.h"
 #include "synonym_index.h"
 #include "override.h"
+#include "vector_query_ops.h"
 #include "hnswlib/hnswlib.h"

 static constexpr size_t ARRAY_FACET_DIM = 4;
--- a/include/or_iterator.h
+++ b/include/or_iterator.h
@ -68,9 +68,8 @@ bool or_iterator_t::intersect(std::vector<or_iterator_t>& its, result_iter_state

            while(its.size() == it_size && its[0].valid()) {
                num_processed++;
-                if (num_processed % 65536 == 0 &&
-                    std::chrono::duration_cast<std::chrono::milliseconds>(
-                            std::chrono::high_resolution_clock::now() - search_begin).count() > search_stop_ms) {
+                if (num_processed % 65536 == 0 && (std::chrono::duration_cast<std::chrono::microseconds>(
+                    std::chrono::system_clock::now().time_since_epoch()).count() - search_begin_us) > search_stop_us) {
                    search_cutoff = true;
                    break;
                }
@ -100,9 +99,8 @@ bool or_iterator_t::intersect(std::vector<or_iterator_t>& its, result_iter_state

            while(its.size() == it_size && !at_end2(its)) {
                num_processed++;
-                if (num_processed % 65536 == 0 &&
-                    std::chrono::duration_cast<std::chrono::milliseconds>(
-                            std::chrono::high_resolution_clock::now() - search_begin).count() > search_stop_ms) {
+                if (num_processed % 65536 == 0 && (std::chrono::duration_cast<std::chrono::microseconds>(
+                    std::chrono::system_clock::now().time_since_epoch()).count() - search_begin_us) > search_stop_us) {
                    search_cutoff = true;
                    break;
                }
@ -138,9 +136,8 @@ bool or_iterator_t::intersect(std::vector<or_iterator_t>& its, result_iter_state

            while(its.size() == it_size && !at_end(its)) {
                num_processed++;
-                if (num_processed % 65536 == 0 &&
-                    std::chrono::duration_cast<std::chrono::milliseconds>(
-                            std::chrono::high_resolution_clock::now() - search_begin).count() > search_stop_ms) {
+                if (num_processed % 65536 == 0 && (std::chrono::duration_cast<std::chrono::microseconds>(
+                    std::chrono::system_clock::now().time_since_epoch()).count() - search_begin_us) > search_stop_us) {
                    search_cutoff = true;
                    break;
                }
--- a/include/posting_list.h
+++ b/include/posting_list.h
@ -211,9 +211,8 @@ bool posting_list_t::block_intersect(std::vector<posting_list_t::iterator_t>& it
        case 1:
            while(its[0].valid()) {
                num_processed++;
-                if (num_processed % 65536 == 0 &&
-                    std::chrono::duration_cast<std::chrono::milliseconds>(
-                            std::chrono::high_resolution_clock::now() - search_begin).count() > search_stop_ms) {
+                if (num_processed % 65536 == 0 && (std::chrono::duration_cast<std::chrono::microseconds>(
+                    std::chrono::system_clock::now().time_since_epoch()).count() - search_begin_us) > search_stop_us) {
                    search_cutoff = true;
                    break;
                }
@ -228,9 +227,8 @@ bool posting_list_t::block_intersect(std::vector<posting_list_t::iterator_t>& it
        case 2:
            while(!at_end2(its)) {
                num_processed++;
-                if (num_processed % 65536 == 0 &&
-                    std::chrono::duration_cast<std::chrono::milliseconds>(
-                            std::chrono::high_resolution_clock::now() - search_begin).count() > search_stop_ms) {
+                if (num_processed % 65536 == 0 && (std::chrono::duration_cast<std::chrono::microseconds>(
+                    std::chrono::system_clock::now().time_since_epoch()).count() - search_begin_us) > search_stop_us) {
                    search_cutoff = true;
                    break;
                }
@ -249,9 +247,8 @@ bool posting_list_t::block_intersect(std::vector<posting_list_t::iterator_t>& it
        default:
            while(!at_end(its)) {
                num_processed++;
-                if (num_processed % 65536 == 0 &&
-                    std::chrono::duration_cast<std::chrono::milliseconds>(
-                            std::chrono::high_resolution_clock::now() - search_begin).count() > search_stop_ms) {
+                if (num_processed % 65536 == 0 && (std::chrono::duration_cast<std::chrono::microseconds>(
+                    std::chrono::system_clock::now().time_since_epoch()).count() - search_begin_us) > search_stop_us) {
                    search_cutoff = true;
                    break;
                }
--- a/include/string_utils.h
+++ b/include/string_utils.h
@ -366,6 +366,8 @@ struct StringUtils {
    static void replace_all(std::string& subject, const std::string& search,
                            const std::string& replace);

+    static void erase_char(std::string& str, const char c);
+
    static std::string trim_curly_spaces(const std::string& str);

    static bool ends_with(std::string const &str, std::string const &ending);
--- a/include/thread_local_vars.h
+++ b/include/thread_local_vars.h
@ -4,6 +4,6 @@ extern thread_local int64_t write_log_index;

 // These are used for circuit breaking search requests
 // NOTE: if you fork off main search thread, care must be taken to initialize these from parent thread values
-extern thread_local std::chrono::high_resolution_clock::time_point search_begin;
-extern thread_local int64_t search_stop_ms;
+extern thread_local uint64_t search_begin_us;
+extern thread_local uint64_t search_stop_us;
 extern thread_local bool search_cutoff;
--- a/include/vector_query_ops.h
+++ b/include/vector_query_ops.h
@ -0,0 +1,32 @@
+#pragma once
+
+#include <string>
+#include <vector>
+#include "option.h"
+
+class Collection;
+
+struct vector_query_t {
+    std::string field_name;
+    size_t k = 0;
+    size_t flat_search_cutoff = 0;
+    std::vector<float> values;
+
+    uint32_t seq_id = 0;
+    bool query_doc_given = false;
+
+    void _reset() {
+        // used for testing only
+        field_name.clear();
+        k = 0;
+        values.clear();
+        seq_id = 0;
+        query_doc_given = false;
+    }
+};
+
+class VectorQueryOps {
+public:
+    static Option<bool> parse_vector_query_str(std::string vector_query_str, vector_query_t& vector_query,
+                                               const Collection* coll);
+};
--- a/src/auth_manager.cpp
+++ b/src/auth_manager.cpp
@ -394,7 +394,13 @@ bool AuthManager::add_item_to_params(std::map<std::string, std::string>& req_par
    if(req_params.count(item.key()) == 0) {
        req_params[item.key()] = str_value;
    } else if(item.key() == "filter_by") {
-        req_params[item.key()] = "(" + req_params[item.key()] + ") && (" + str_value + ")";
+        if(!req_params[item.key()].empty() && !str_value.empty()) {
+            req_params[item.key()] = "(" + req_params[item.key()] + ") && (" + str_value + ")";
+        } else if(req_params[item.key()].empty() && !str_value.empty()) {
+            req_params[item.key()] = "(" + str_value + ")";
+        } else if(!req_params[item.key()].empty() && str_value.empty()) {
+            req_params[item.key()] = "(" + req_params[item.key()] + ")";
+        }
    } else if(overwrite) {
        req_params[item.key()] = str_value;
    }
--- a/src/collection.cpp
+++ b/src/collection.cpp
@ -15,6 +15,7 @@
 #include "topster.h"
 #include "logger.h"
 #include "thread_local_vars.h"
+#include "vector_query_ops.h"

 const std::string override_t::MATCH_EXACT = "exact";
 const std::string override_t::MATCH_CONTAINS = "contains";
@ -867,14 +868,18 @@ Option<nlohmann::json> Collection::search(const std::string & raw_query,
                                  const size_t filter_curated_hits_option,
                                  const bool prioritize_token_position,
                                  const std::string& vector_query_str,
+                                  const bool enable_highlight_v1,
+                                  const uint64_t search_time_start_us,
                                  const size_t facet_sample_percent,
                                  const size_t facet_sample_threshold) const {

    std::shared_lock lock(mutex);

    // setup thread local vars
-    search_stop_ms = search_stop_millis;
-    search_begin = std::chrono::high_resolution_clock::now();
+    search_stop_us = search_stop_millis * 1000;
+    search_begin_us = (search_time_start_us != 0) ? search_time_start_us :
+                      std::chrono::duration_cast<std::chrono::microseconds>(
+                           std::chrono::system_clock::now().time_since_epoch()).count();
    search_cutoff = false;

    if(raw_query != "*" && raw_search_fields.empty()) {
@ -927,8 +932,9 @@ Option<nlohmann::json> Collection::search(const std::string & raw_query,
            return Option<nlohmann::json>(400, "Vector query is supported only on wildcard (q=*) searches.");
        }

-        if(!CollectionManager::parse_vector_query_str(vector_query_str, vector_query)) {
-            return Option<nlohmann::json>(400, "The `vector_query` parameter is malformed.");
+        auto parse_vector_op = VectorQueryOps::parse_vector_query_str(vector_query_str, vector_query, this);
+        if(!parse_vector_op.ok()) {
+            return Option<nlohmann::json>(400, parse_vector_op.error());
        }

        auto vector_field_it = search_schema.find(vector_query.field_name);
@ -1491,7 +1497,11 @@ Option<nlohmann::json> Collection::search(const std::string & raw_query,
            }

            nlohmann::json wrapper_doc;
-            wrapper_doc["highlights"] = nlohmann::json::array();
+
+            if(enable_highlight_v1) {
+                wrapper_doc["highlights"] = nlohmann::json::array();
+            }
+
            std::vector<highlight_t> highlights;
            StringUtils string_utils;

@ -1562,34 +1572,36 @@ Option<nlohmann::json> Collection::search(const std::string & raw_query,
                prune_doc(highlight_res, hfield_names, tsl::htrie_set<char>(), "");
            }

-            std::sort(highlights.begin(), highlights.end());
+            if(enable_highlight_v1) {
+                std::sort(highlights.begin(), highlights.end());

-            for(const auto & highlight: highlights) {
-                auto field_it = search_schema.find(highlight.field);
-                if(field_it == search_schema.end() || field_it->nested) {
-                    // nested field highlighting will be available only in the new highlight structure.
-                    continue;
-                }
-
-                nlohmann::json h_json = nlohmann::json::object();
-                h_json["field"] = highlight.field;
-
-                if(!highlight.indices.empty()) {
-                    h_json["matched_tokens"] = highlight.matched_tokens;
-                    h_json["indices"] = highlight.indices;
-                    h_json["snippets"] = highlight.snippets;
-                    if(!highlight.values.empty()) {
-                        h_json["values"] = highlight.values;
+                for(const auto & highlight: highlights) {
+                    auto field_it = search_schema.find(highlight.field);
+                    if(field_it == search_schema.end() || field_it->nested) {
+                        // nested field highlighting will be available only in the new highlight structure.
+                        continue;
                    }
-                } else {
-                    h_json["matched_tokens"] = highlight.matched_tokens[0];
-                    h_json["snippet"] = highlight.snippets[0];
-                    if(!highlight.values.empty() && !highlight.values[0].empty()) {
-                        h_json["value"] = highlight.values[0];
-                    }
-                }

-                wrapper_doc["highlights"].push_back(h_json);
+                    nlohmann::json h_json = nlohmann::json::object();
+                    h_json["field"] = highlight.field;
+
+                    if(!highlight.indices.empty()) {
+                        h_json["matched_tokens"] = highlight.matched_tokens;
+                        h_json["indices"] = highlight.indices;
+                        h_json["snippets"] = highlight.snippets;
+                        if(!highlight.values.empty()) {
+                            h_json["values"] = highlight.values;
+                        }
+                    } else {
+                        h_json["matched_tokens"] = highlight.matched_tokens[0];
+                        h_json["snippet"] = highlight.snippets[0];
+                        if(!highlight.values.empty() && !highlight.values[0].empty()) {
+                            h_json["value"] = highlight.values[0];
+                        }
+                    }
+
+                    wrapper_doc["highlights"].push_back(h_json);
+                }
            }

            //wrapper_doc["seq_id"] = (uint32_t) field_order_kv->key;
@ -1654,8 +1666,8 @@ Option<nlohmann::json> Collection::search(const std::string & raw_query,
        facet_result["counts"] = nlohmann::json::array();

        std::vector<facet_value_t> facet_values;
-        std::vector<std::pair<int64_t, facet_count_t>> facet_hash_counts;
-            
+        std::vector<std::pair<uint64_t, facet_count_t>> facet_hash_counts;
+
        for (const auto & kv : a_facet.result_map) {
            facet_hash_counts.emplace_back(kv);
        }
--- a/src/collection_manager.cpp
+++ b/src/collection_manager.cpp
@ -630,7 +630,9 @@ Option<bool> add_unsigned_int_list_param(const std::string& param_name, const st

 Option<bool> CollectionManager::do_search(std::map<std::string, std::string>& req_params,
                                          nlohmann::json& embedded_params,
-                                          std::string& results_json_str) {
+                                          std::string& results_json_str,
+                                          uint64_t start_ts) {
+
    auto begin = std::chrono::high_resolution_clock::now();

    const char *NUM_TYPOS = "num_typos";
@ -695,6 +697,8 @@ Option<bool> CollectionManager::do_search(std::map<std::string, std::string>& re
    const char *EXHAUSTIVE_SEARCH = "exhaustive_search";
    const char *SPLIT_JOIN_TOKENS = "split_join_tokens";

+    const char *ENABLE_HIGHLIGHT_V1 = "enable_highlight_v1";
+
    const char *FACET_SAMPLE_PERCENT = "facet_sample_percent";
    const char *FACET_SAMPLE_THRESHOLD = "facet_sample_threshold";

@ -767,12 +771,13 @@ Option<bool> CollectionManager::do_search(std::map<std::string, std::string>& re
    size_t filter_curated_hits_option = 2;
    std::string highlight_fields;
    bool exhaustive_search = false;
-    size_t search_cutoff_ms = 3600000;
+    size_t search_cutoff_ms = 30 * 1000;
    enable_t split_join_tokens = fallback;
    size_t max_candidates = 0;
    std::vector<enable_t> infixes;
    size_t max_extra_prefix = INT16_MAX;
    size_t max_extra_suffix = INT16_MAX;
+    bool enable_highlight_v1 = true;

    size_t facet_sample_percent = 100;
    size_t facet_sample_threshold = 0;
@ -817,6 +822,7 @@ Option<bool> CollectionManager::do_search(std::map<std::string, std::string>& re
        {PRE_SEGMENTED_QUERY, &pre_segmented_query},
        {EXHAUSTIVE_SEARCH, &exhaustive_search},
        {ENABLE_OVERRIDES, &enable_overrides},
+        {ENABLE_HIGHLIGHT_V1, &enable_highlight_v1},
    };

    std::unordered_map<std::string, std::vector<std::string>*> str_list_values = {
@ -990,6 +996,8 @@ Option<bool> CollectionManager::do_search(std::map<std::string, std::string>& re
                                                          filter_curated_hits_option,
                                                          prioritize_token_position,
                                                          vector_query,
+                                                          enable_highlight_v1,
+                                                          start_ts,
                                                          facet_sample_percent,
                                                          facet_sample_threshold
                                                        );
@ -1237,6 +1245,7 @@ Option<bool> CollectionManager::load_collection(const nlohmann::json &collection
    size_t num_found_docs = 0;
    size_t num_valid_docs = 0;
    size_t num_indexed_docs = 0;
+    size_t batch_doc_str_size = 0;

    auto begin = std::chrono::high_resolution_clock::now();

@ -1245,14 +1254,17 @@ Option<bool> CollectionManager::load_collection(const nlohmann::json &collection
        const uint32_t seq_id = Collection::get_seq_id_from_key(iter->key().ToString());

        nlohmann::json document;
+        const std::string& doc_string = iter->value().ToString();

        try {
-            document = nlohmann::json::parse(iter->value().ToString());
+            document = nlohmann::json::parse(doc_string);
        } catch(const std::exception& e) {
            LOG(ERROR) << "JSON error: " << e.what();
            return Option<bool>(400, "Bad JSON.");
        }

+        batch_doc_str_size += doc_string.size();
+
        if(collection->get_enable_nested_fields()) {
            std::vector<field> flattened_fields;
            field::flatten_doc(document, collection->get_nested_fields(), true, flattened_fields);
@ -1269,10 +1281,14 @@ Option<bool> CollectionManager::load_collection(const nlohmann::json &collection
        iter->Next();
        bool last_record = !(iter->Valid() && iter->key().starts_with(seq_id_prefix));

+        // if expected memory usage exceeds 250M, we index the accumulated set without caring about batch size
+        bool exceeds_batch_mem_threshold = ((batch_doc_str_size * 7) > (250 * 1014 * 1024));
+
        // batch must match atleast the number of shards
-        if((num_valid_docs % batch_size == 0) || last_record) {
+         if(exceeds_batch_mem_threshold || (num_valid_docs % batch_size == 0) || last_record) {
            size_t num_records = index_records.size();
            size_t num_indexed = collection->batch_index_in_memory(index_records);
+            batch_doc_str_size = 0;

            if(num_indexed != num_records) {
                const Option<std::string> & index_error_op = get_first_index_error(index_records);
@ -1413,112 +1429,3 @@ Option<Collection*> CollectionManager::clone_collection(const string& existing_n

    return Option<Collection*>(new_coll);
 }
-
-bool CollectionManager::parse_vector_query_str(std::string vector_query_str, vector_query_t& vector_query) {
-    // FORMAT:
-    // field_name([0.34, 0.66, 0.12, 0.68], exact: false, k: 10)
-    size_t i = 0;
-    while(i < vector_query_str.size()) {
-        if(vector_query_str[i] != ':') {
-            vector_query.field_name += vector_query_str[i];
-            i++;
-        } else {
-            if(vector_query_str[i] != ':') {
-                // missing ":"
-                return false;
-            }
-
-            // field name is done
-            i++;
-
-            StringUtils::trim(vector_query.field_name);
-
-            while(i < vector_query_str.size() && vector_query_str[i] != '(') {
-                i++;
-            }
-
-            if(vector_query_str[i] != '(') {
-                // missing "("
-                return false;
-            }
-
-            i++;
-
-            while(i < vector_query_str.size() && vector_query_str[i] != '[') {
-                i++;
-            }
-
-            if(vector_query_str[i] != '[') {
-                // missing opening "["
-                return false;
-            }
-
-            i++;
-
-            std::string values_str;
-            while(i < vector_query_str.size() && vector_query_str[i] != ']') {
-                values_str += vector_query_str[i];
-                i++;
-            }
-
-            if(vector_query_str[i] != ']') {
-                // missing closing "]"
-                return false;
-            }
-
-            i++;
-
-            std::vector<std::string> svalues;
-            StringUtils::split(values_str, svalues, ",");
-
-            for(auto& svalue: svalues) {
-                if(!StringUtils::is_float(svalue)) {
-                    return false;
-                }
-
-                vector_query.values.push_back(std::stof(svalue));
-            }
-
-            if(i == vector_query_str.size()-1) {
-                // missing params
-                return true;
-            }
-
-            std::string param_str = vector_query_str.substr(i, (vector_query_str.size() - i));
-            std::vector<std::string> param_kvs;
-            StringUtils::split(param_str, param_kvs, ",");
-
-            for(auto& param_kv_str: param_kvs) {
-                if(param_kv_str.back() == ')') {
-                    param_kv_str.pop_back();
-                }
-
-                std::vector<std::string> param_kv;
-                StringUtils::split(param_kv_str, param_kv, ":");
-                if(param_kv.size() != 2) {
-                    return false;
-                }
-
-                if(param_kv[0] == "k") {
-                    if(!StringUtils::is_uint32_t(param_kv[1])) {
-                        return false;
-                    }
-
-                    vector_query.k = std::stoul(param_kv[1]);
-                }
-
-                if(param_kv[0] == "flat_search_cutoff") {
-                    if(!StringUtils::is_uint32_t(param_kv[1])) {
-                        return false;
-                    }
-
-                    vector_query.flat_search_cutoff = std::stoi(param_kv[1]);
-                }
-            }
-
-            return true;
-        }
-    }
-
-    return false;
-}
--- a/src/core_api.cpp
+++ b/src/core_api.cpp
@ -376,7 +376,8 @@ bool get_search(const std::shared_ptr<http_req>& req, const std::shared_ptr<http
    }

    std::string results_json_str;
-    Option<bool> search_op = CollectionManager::do_search(req->params, req->embedded_params_vec[0], results_json_str);
+    Option<bool> search_op = CollectionManager::do_search(req->params, req->embedded_params_vec[0],
+                                                          results_json_str, req->start_ts);

    if(!search_op.ok()) {
        res->set(search_op.code(), search_op.error());
@ -523,7 +524,8 @@ bool post_multi_search(const std::shared_ptr<http_req>& req, const std::shared_p
        }

        std::string results_json_str;
-        Option<bool> search_op = CollectionManager::do_search(req->params, req->embedded_params_vec[i], results_json_str);
+        Option<bool> search_op = CollectionManager::do_search(req->params, req->embedded_params_vec[i],
+                                                              results_json_str, req->start_ts);

        if(search_op.ok()) {
            response["results"].push_back(nlohmann::json::parse(results_json_str));
@ -588,6 +590,7 @@ bool get_export_documents(const std::shared_ptr<http_req>& req, const std::share
    const char* FILTER_BY = "filter_by";
    const char* INCLUDE_FIELDS = "include_fields";
    const char* EXCLUDE_FIELDS = "exclude_fields";
+    const char* BATCH_SIZE = "batch_size";

    export_state_t* export_state = nullptr;

@ -617,6 +620,10 @@ bool get_export_documents(const std::shared_ptr<http_req>& req, const std::share
            export_state->exclude_fields = std::set<std::string>(exclude_fields_vec.begin(), exclude_fields_vec.end());
        }

+        if(req->params.count(BATCH_SIZE) != 0 && StringUtils::is_uint32_t(req->params[BATCH_SIZE])) {
+            export_state->export_batch_size = std::stoul(req->params[BATCH_SIZE]);
+        }
+
        if(simple_filter_query.empty()) {
            export_state->iter_upper_bound_key = collection->get_seq_id_collection_prefix() + "`";  // cannot inline this
            export_state->iter_upper_bound = new rocksdb::Slice(export_state->iter_upper_bound_key);
@ -644,10 +651,12 @@ bool get_export_documents(const std::shared_ptr<http_req>& req, const std::share

    if(export_state->it != nullptr) {
        rocksdb::Iterator* it = export_state->it;
+        size_t batch_counter = 0;
+        res->body.clear();

-        if(it->Valid() && it->key().ToString().compare(0, seq_id_prefix.size(), seq_id_prefix) == 0) {
+        while(it->Valid() && it->key().ToString().compare(0, seq_id_prefix.size(), seq_id_prefix) == 0) {
            if(export_state->include_fields.empty() && export_state->exclude_fields.empty()) {
-                res->body = it->value().ToString();
+                res->body += it->value().ToString();
            } else {
                nlohmann::json doc = nlohmann::json::parse(it->value().ToString());
                nlohmann::json filtered_doc;
@ -663,7 +672,7 @@ bool get_export_documents(const std::shared_ptr<http_req>& req, const std::share
                    }
                }

-                res->body = filtered_doc.dump();
+                res->body += filtered_doc.dump();
            }

            it->Next();
@ -677,10 +686,15 @@ bool get_export_documents(const std::shared_ptr<http_req>& req, const std::share
                req->last_chunk_aggregate = true;
                res->final = true;
            }
+
+            batch_counter++;
+            if(batch_counter == export_state->export_batch_size) {
+                break;
+            }
        }
    } else {
        bool done;
-        stateful_export_docs(export_state, 100, done);
+        stateful_export_docs(export_state, export_state->export_batch_size, done);

        if(!done) {
            req->last_chunk_aggregate = false;
--- a/src/http_server.cpp
+++ b/src/http_server.cpp
@ -474,8 +474,6 @@ int HttpServer::catch_all_handler(h2o_handler_t *_h2o_handler, h2o_req_t *req) {
        }
    }

-
-
    std::shared_ptr<http_req> request = std::make_shared<http_req>(req, rpath->http_method, path_without_query,
                                                                   route_hash, query_map, embedded_params_vec,
                                                                   api_auth_key_sent, body, client_ip);
--- a/src/index.cpp
+++ b/src/index.cpp
@ -24,17 +24,17 @@
 #include <timsort.hpp>
 #include "logger.h"

-#define RETURN_CIRCUIT_BREAKER if(std::chrono::duration_cast<std::chrono::milliseconds>(\
-                                std::chrono::high_resolution_clock::now() - search_begin).count() > search_stop_ms) { \
-                                    search_cutoff = true;                                                    \
-                                    return ;\
-                                }
+#define RETURN_CIRCUIT_BREAKER if((std::chrono::duration_cast<std::chrono::microseconds>( \
+                  std::chrono::system_clock::now().time_since_epoch()).count() - search_begin_us) > search_stop_us) { \
+                    search_cutoff = true; \
+                    return ;\
+            }

-#define BREAK_CIRCUIT_BREAKER if(std::chrono::duration_cast<std::chrono::milliseconds>(\
-                                std::chrono::high_resolution_clock::now() - search_begin).count() > search_stop_ms) { \
-                                    search_cutoff = true;                              \
-                                    break;\
-                                }
+#define BREAK_CIRCUIT_BREAKER if((std::chrono::duration_cast<std::chrono::microseconds>( \
+                 std::chrono::system_clock::now().time_since_epoch()).count() - search_begin_us) > search_stop_us) { \
+                    search_cutoff = true; \
+                    break;\
+                }

 spp::sparse_hash_map<uint32_t, int64_t> Index::text_match_sentinel_value;
 spp::sparse_hash_map<uint32_t, int64_t> Index::seq_id_sentinel_value;
@ -1171,16 +1171,14 @@ void Index::tokenize_string_array_with_facets(const std::vector<std::string>& st
            }
        }

-        //LOG(INFO) << "Str: " << str << ", last_token: " << last_token;
+        if(is_facet) {
+            facet_hashes.push_back(facet_hash);
+        }

        if(token_set.empty()) {
            continue;
        }

-        if(is_facet) {
-            facet_hashes.push_back(facet_hash);
-        }
-
        for(auto& the_token: token_set) {
            // repeat last element to indicate end of offsets for this array index
            token_to_offsets[the_token].push_back(token_to_offsets[the_token].back());
@ -2362,8 +2360,8 @@ void Index::search_infix(const std::string& query, const std::string& field_name

    auto search_tree = search_index.at(field_name);

-    const auto parent_search_begin = search_begin;
-    const auto parent_search_stop_ms = search_stop_ms;
+    const auto parent_search_begin = search_begin_us;
+    const auto parent_search_stop_ms = search_stop_us;
    auto parent_search_cutoff = search_cutoff;

    for(auto infix_set: infix_sets) {
@ -2371,7 +2369,7 @@ void Index::search_infix(const std::string& query, const std::string& field_name
                                     &num_processed, &m_process, &cv_process,
                                     &parent_search_begin, &parent_search_stop_ms, &parent_search_cutoff]() {

-            search_begin = parent_search_begin;
+            search_begin_us = parent_search_begin;
            search_cutoff = parent_search_cutoff;
            auto op_search_stop_ms = parent_search_stop_ms/2;

@ -2396,8 +2394,8 @@ void Index::search_infix(const std::string& query, const std::string& field_name

                // check for search cutoff but only once every 2^10 docs to reduce overhead
                if(((num_iterated + 1) % (1 << 12)) == 0) {
-                    if (std::chrono::duration_cast<std::chrono::milliseconds>(
-                            std::chrono::high_resolution_clock::now() - search_begin).count() > op_search_stop_ms) {
+                    if ((std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::system_clock::now().
+                        time_since_epoch()).count() - search_begin_us) > op_search_stop_ms) {
                        search_cutoff = true;
                        break;
                    }
@ -2596,6 +2594,11 @@ void Index::search(std::vector<query_tokens_t>& field_query_tokens, const std::v

            for (const auto& dist_label : dist_labels) {
                uint32 seq_id = dist_label.second;
+
+                if(vector_query.query_doc_given && vector_query.seq_id == seq_id) {
+                    continue;
+                }
+
                uint64_t distinct_id = seq_id;
                if (group_limit != 0) {
                    distinct_id = get_distinct_id(group_by_fields, seq_id);
@ -4386,8 +4389,8 @@ void Index::search_wildcard(filter_node_t const* const& filter_tree_root,
    size_t num_queued = 0;
    size_t filter_index = 0;

-    const auto parent_search_begin = search_begin;
-    const auto parent_search_stop_ms = search_stop_ms;
+    const auto parent_search_begin = search_begin_us;
+    const auto parent_search_stop_ms = search_stop_us;
    auto parent_search_cutoff = search_cutoff;

    for(size_t thread_id = 0; thread_id < num_threads && filter_index < filter_ids_length; thread_id++) {
@ -4412,8 +4415,8 @@ void Index::search_wildcard(filter_node_t const* const& filter_tree_root,
                                     batch_result_ids, batch_res_len,
                                     &num_processed, &m_process, &cv_process]() {

-            search_begin = parent_search_begin;
-            search_stop_ms = parent_search_stop_ms;
+            search_begin_us = parent_search_begin;
+            search_stop_us = parent_search_stop_ms;
            search_cutoff = parent_search_cutoff;

            size_t filter_index = 0;
--- a/src/string_utils.cpp
+++ b/src/string_utils.cpp
@ -217,6 +217,10 @@ void StringUtils::replace_all(std::string& subject, const std::string& search, c
    }
 }

+void StringUtils::erase_char(std::string& str, const char c) {
+    str.erase(std::remove(str.begin(), str.end(), c), str.cend());
+}
+
 std::string StringUtils::trim_curly_spaces(const std::string& str) {
    std::string left_trimmed;
    int i = 0;
--- a/src/synonym_index.cpp
+++ b/src/synonym_index.cpp
@ -262,9 +262,9 @@ nlohmann::json synonym_t::to_view_json() const {
    }

    if(!symbols.empty()) {
-        obj["symbols"] = nlohmann::json::array();
+        obj["symbols_to_index"] = nlohmann::json::array();
        for(char c: symbols) {
-            obj["symbols"].push_back(std::string(1, c));
+            obj["symbols_to_index"].push_back(std::string(1, c));
        }
    }

--- a/src/thread_local_vars.cpp
+++ b/src/thread_local_vars.cpp
@ -2,6 +2,6 @@
 #include "thread_local_vars.h"

 thread_local int64_t write_log_index = 0;
-thread_local std::chrono::high_resolution_clock::time_point search_begin;
-thread_local int64_t search_stop_ms;
+thread_local uint64_t search_begin_us;
+thread_local uint64_t search_stop_us;
 thread_local bool search_cutoff = false;
--- a/src/typesense_server_utils.cpp
+++ b/src/typesense_server_utils.cpp
@ -105,6 +105,8 @@ void init_cmdline_options(cmdline::parser & options, int argc, char **argv) {
    options.add<int>("memory-used-max-percentage", '\0', "Reject writes when memory usage exceeds this percentage. Default: 100 (never reject).", false, 100);
    options.add<bool>("skip-writes", '\0', "Skip all writes except config changes. Default: false.", false, false);

+    options.add<int>("log-slow-searches-time-ms", '\0', "When >= 0, searches that take longer than this duration are logged.", false, 30*1000);
+
    // DEPRECATED
    options.add<std::string>("listen-address", 'h', "[DEPRECATED: use `api-address`] Address to which Typesense API service binds.", false, "0.0.0.0");
    options.add<uint32_t>("listen-port", 'p', "[DEPRECATED: use `api-port`] Port on which Typesense API service listens.", false, 8108);
--- a/src/vector_query_ops.cpp
+++ b/src/vector_query_ops.cpp
@ -0,0 +1,159 @@
+#include "vector_query_ops.h"
+#include "string_utils.h"
+#include "collection.h"
+
+Option<bool> VectorQueryOps::parse_vector_query_str(std::string vector_query_str, vector_query_t& vector_query,
+                                            const Collection* coll) {
+    // FORMAT:
+    // field_name([0.34, 0.66, 0.12, 0.68], exact: false, k: 10)
+    size_t i = 0;
+    while(i < vector_query_str.size()) {
+        if(vector_query_str[i] != ':') {
+            vector_query.field_name += vector_query_str[i];
+            i++;
+        } else {
+            if(vector_query_str[i] != ':') {
+                // missing ":"
+                return Option<bool>(400, "Malformed vector query string: `:` is missing.");
+            }
+
+            // field name is done
+            i++;
+
+            StringUtils::trim(vector_query.field_name);
+
+            while(i < vector_query_str.size() && vector_query_str[i] != '(') {
+                i++;
+            }
+
+            if(vector_query_str[i] != '(') {
+                // missing "("
+                return Option<bool>(400, "Malformed vector query string.");
+            }
+
+            i++;
+
+            while(i < vector_query_str.size() && vector_query_str[i] != '[') {
+                i++;
+            }
+
+            if(vector_query_str[i] != '[') {
+                // missing opening "["
+                return Option<bool>(400, "Malformed vector query string.");
+            }
+
+            i++;
+
+            std::string values_str;
+            while(i < vector_query_str.size() && vector_query_str[i] != ']') {
+                values_str += vector_query_str[i];
+                i++;
+            }
+
+            if(vector_query_str[i] != ']') {
+                // missing closing "]"
+                return Option<bool>(400, "Malformed vector query string.");
+            }
+
+            i++;
+
+            std::vector<std::string> svalues;
+            StringUtils::split(values_str, svalues, ",");
+
+            for(auto& svalue: svalues) {
+                if(!StringUtils::is_float(svalue)) {
+                    return Option<bool>(400, "Malformed vector query string: one of the vector values is not a float.");
+                }
+
+                vector_query.values.push_back(std::stof(svalue));
+            }
+
+            if(i == vector_query_str.size()-1) {
+                // missing params
+                if(vector_query.values.empty()) {
+                    // when query values are missing, atleast the `id` parameter must be present
+                    return Option<bool>(400, "When a vector query value is empty, an `id` parameter must be present.");
+                }
+
+                return Option<bool>(true);
+            }
+
+            std::string param_str = vector_query_str.substr(i, (vector_query_str.size() - i));
+            std::vector<std::string> param_kvs;
+            StringUtils::split(param_str, param_kvs, ",");
+
+            for(auto& param_kv_str: param_kvs) {
+                if(param_kv_str.back() == ')') {
+                    param_kv_str.pop_back();
+                }
+
+                std::vector<std::string> param_kv;
+                StringUtils::split(param_kv_str, param_kv, ":");
+                if(param_kv.size() != 2) {
+                    return Option<bool>(400, "Malformed vector query string.");
+                }
+
+                if(param_kv[0] == "id") {
+                    if(!vector_query.values.empty()) {
+                        // cannot pass both vector values and id
+                        return Option<bool>(400, "Malformed vector query string: cannot pass both vector query "
+                                                 "and `id` parameter.");
+                    }
+
+                    Option<uint32_t> id_op = coll->doc_id_to_seq_id(param_kv[1]);
+                    if(!id_op.ok()) {
+                        return Option<bool>(400, "Document id referenced in vector query is not found.");
+                    }
+
+                    nlohmann::json document;
+                    auto doc_op  = coll->get_document_from_store(id_op.get(), document);
+                    if(!doc_op.ok()) {
+                        return Option<bool>(400, "Document id referenced in vector query is not found.");
+                    }
+
+                    if(!document.contains(vector_query.field_name) || !document[vector_query.field_name].is_array()) {
+                        return Option<bool>(400, "Document referenced in vector query does not contain a valid "
+                                                 "vector field.");
+                    }
+
+                    for(auto& fvalue: document[vector_query.field_name]) {
+                        if(!fvalue.is_number_float()) {
+                            return Option<bool>(400, "Document referenced in vector query does not contain a valid "
+                                                     "vector field.");
+                        }
+
+                        vector_query.values.push_back(fvalue.get<float>());
+                    }
+
+                    vector_query.query_doc_given = true;
+                    vector_query.seq_id = id_op.get();
+                }
+
+                if(param_kv[0] == "k") {
+                    if(!StringUtils::is_uint32_t(param_kv[1])) {
+                        return Option<bool>(400, "Malformed vector query string: `k` parameter must be an integer.");
+                    }
+
+                    vector_query.k = std::stoul(param_kv[1]);
+                }
+
+                if(param_kv[0] == "flat_search_cutoff") {
+                    if(!StringUtils::is_uint32_t(param_kv[1])) {
+                        return Option<bool>(400, "Malformed vector query string: "
+                                                 "`flat_search_cutoff` parameter must be an integer.");
+                    }
+
+                    vector_query.flat_search_cutoff = std::stoi(param_kv[1]);
+                }
+            }
+
+            if(!vector_query.query_doc_given && vector_query.values.empty()) {
+                return Option<bool>(400, "When a vector query value is empty, an `id` parameter must be present.");
+            }
+
+            return Option<bool>(true);
+        }
+    }
+
+    return Option<bool>(400, "Malformed vector query string.");
+}
--- a/test/collection_faceting_test.cpp
+++ b/test/collection_faceting_test.cpp
@ -983,23 +983,23 @@ TEST_F(CollectionFacetingTest, FacetByNestedIntField) {

 TEST_F(CollectionFacetingTest, FacetParseTest){
    std::vector<field> fields = {
-        field("score", field_types::INT32, true),
-        field("grade", field_types::INT32, true),
-        field("rank", field_types::INT32, true),
+            field("score", field_types::INT32, true),
+            field("grade", field_types::INT32, true),
+            field("rank", field_types::INT32, true),
    };

    Collection* coll1 = collectionManager.create_collection("coll1", 1, fields).get();

    std::vector<std::string> range_facet_fields {
-        "score(fail:[0, 40], pass:[40, 100])",
-        "grade(A:[80, 100], B:[60, 80], C:[40, 60])"
+            "score(fail:[0, 40], pass:[40, 100])",
+            "grade(A:[80, 100], B:[60, 80], C:[40, 60])"
    };
    std::vector<facet> range_facets;
    for(const std::string & facet_field: range_facet_fields) {
        coll1->parse_facet(facet_field, range_facets);
    }
    ASSERT_EQ(2, range_facets.size());
-    
+
    ASSERT_STREQ("score", range_facets[0].field_name.c_str());
    ASSERT_TRUE(range_facets[0].is_range_query);
    ASSERT_GT(range_facets[0].facet_range_map.size(), 0);
@ -1009,8 +1009,8 @@ TEST_F(CollectionFacetingTest, FacetParseTest){
    ASSERT_GT(range_facets[1].facet_range_map.size(), 0);

    std::vector<std::string> normal_facet_fields {
-        "score",
-        "grade"
+            "score",
+            "grade"
    };
    std::vector<facet> normal_facets;
    for(const std::string & facet_field: normal_facet_fields) {
@ -1022,18 +1022,18 @@ TEST_F(CollectionFacetingTest, FacetParseTest){
    ASSERT_STREQ("grade", normal_facets[1].field_name.c_str());

    std::vector<std::string> mixed_facet_fields {
-        "score", 
-        "grade(A:[80, 100], B:[60, 80], C:[40, 60])", 
-        "rank"
+            "score",
+            "grade(A:[80, 100], B:[60, 80], C:[40, 60])",
+            "rank"
    };
    std::vector<facet> mixed_facets;
    for(const std::string & facet_field: mixed_facet_fields) {
        coll1->parse_facet(facet_field, mixed_facets);
    }
    ASSERT_EQ(3, mixed_facets.size());
-    
+
    ASSERT_STREQ("score", mixed_facets[0].field_name.c_str());
-    
+
    ASSERT_STREQ("grade", mixed_facets[1].field_name.c_str());
    ASSERT_TRUE(mixed_facets[1].is_range_query);
    ASSERT_GT(mixed_facets[1].facet_range_map.size(), 0);
@ -1041,7 +1041,6 @@ TEST_F(CollectionFacetingTest, FacetParseTest){
    ASSERT_STREQ("rank", mixed_facets[2].field_name.c_str());
 }

-
 TEST_F(CollectionFacetingTest, RangeFacetTest) {
    std::vector<field> fields = {field("place", field_types::STRING, false),
                                 field("state", field_types::STRING, false),
@ -1345,11 +1344,44 @@ TEST_F(CollectionFacetingTest, SampleFacetCounts) {
    // test for sample percent > 100

    auto res_op = coll1->search("*", {}, "", {"color"}, {}, {0}, 3, 1, FREQUENCY, {true}, 5,
-                        spp::sparse_hash_set<std::string>(),
-                        spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 20, {}, {}, {}, 0,
-                        "<mark>", "</mark>", {}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7, fallback,
-                        4, {off}, 3, 3, 2, 2, false, "", 200, 0);
+                                spp::sparse_hash_set<std::string>(),
+                                spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 20, {}, {}, {}, 0,
+                                "<mark>", "</mark>", {}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7, fallback,
+                                4, {off}, 3, 3, 2, 2, false, "", 200, 0);

    ASSERT_FALSE(res_op.ok());
    ASSERT_EQ("Value of `facet_sample_percent` must be less than 100.", res_op.error());
 }
+
+TEST_F(CollectionFacetingTest, FacetOnArrayFieldWithSpecialChars) {
+    std::vector<field> fields = {
+            field("tags", field_types::STRING_ARRAY, true),
+            field("points", field_types::INT32, true),
+    };
+
+    Collection* coll1 = collectionManager.create_collection("coll1", 1, fields).get();
+
+    nlohmann::json doc;
+    doc["tags"] = {"gamma"};
+    doc["points"] = 10;
+    ASSERT_TRUE(coll1->add(doc.dump()).ok());
+
+    doc["tags"] = {"alpha", "| . |", "beta", "gamma"};
+    doc["points"] = 10;
+    ASSERT_TRUE(coll1->add(doc.dump()).ok());
+
+    auto results = coll1->search("*", {},
+                                 "", {"tags"}, {}, {2}, 10, 1, FREQUENCY, {true}, 1).get();
+
+    ASSERT_EQ(1, results["facet_counts"].size());
+    ASSERT_EQ(4, results["facet_counts"][0]["counts"].size());
+
+    for(size_t i = 0; i < results["facet_counts"][0]["counts"].size(); i++) {
+        auto fvalue = results["facet_counts"][0]["counts"][i]["value"].get<std::string>();
+        if(fvalue == "gamma") {
+            ASSERT_EQ(2, results["facet_counts"][0]["counts"][i]["count"].get<size_t>());
+        } else {
+            ASSERT_EQ(1, results["facet_counts"][0]["counts"][i]["count"].get<size_t>());
+        }
+    }
+}
--- a/test/collection_manager_test.cpp
+++ b/test/collection_manager_test.cpp
@ -526,7 +526,10 @@ TEST_F(CollectionManagerTest, VerifyEmbeddedParametersOfScopedAPIKey) {
    embedded_params["filter_by"] = "points: 200";

    std::string json_res;
-    auto search_op = collectionManager.do_search(req_params, embedded_params, json_res);
+    auto now_ts = std::chrono::duration_cast<std::chrono::microseconds>(
+            std::chrono::system_clock::now().time_since_epoch()).count();
+
+    auto search_op = collectionManager.do_search(req_params, embedded_params, json_res, now_ts);
    ASSERT_TRUE(search_op.ok());

    nlohmann::json res_obj = nlohmann::json::parse(json_res);
@ -540,7 +543,7 @@ TEST_F(CollectionManagerTest, VerifyEmbeddedParametersOfScopedAPIKey) {
    req_params["filter_by"] = "year: 1922";
    req_params["q"] = "*";

-    search_op = collectionManager.do_search(req_params, embedded_params, json_res);
+    search_op = collectionManager.do_search(req_params, embedded_params, json_res, now_ts);
    ASSERT_TRUE(search_op.ok());
    res_obj = nlohmann::json::parse(json_res);

@ -989,43 +992,6 @@ TEST_F(CollectionManagerTest, ParseSortByClause) {
    ASSERT_FALSE(sort_by_parsed);
 }

-TEST_F(CollectionManagerTest, ParseVectorQueryString) {
-    vector_query_t vector_query;
-    bool parsed = CollectionManager::parse_vector_query_str("vec:([0.34, 0.66, 0.12, 0.68], k: 10)", vector_query);
-    ASSERT_TRUE(parsed);
-    ASSERT_EQ("vec", vector_query.field_name);
-    ASSERT_EQ(10, vector_query.k);
-    std::vector<float> fvs = {0.34, 0.66, 0.12, 0.68};
-    ASSERT_EQ(fvs.size(), vector_query.values.size());
-    for(size_t i = 0; i < fvs.size(); i++) {
-        ASSERT_EQ(fvs[i], vector_query.values[i]);
-    }
-
-    vector_query._reset();
-    parsed = CollectionManager::parse_vector_query_str("vec:([0.34, 0.66, 0.12, 0.68], k: 10)", vector_query);
-    ASSERT_TRUE(parsed);
-
-    vector_query._reset();
-    parsed = CollectionManager::parse_vector_query_str("vec:[0.34, 0.66, 0.12, 0.68], k: 10)", vector_query);
-    ASSERT_FALSE(parsed);
-
-    vector_query._reset();
-    parsed = CollectionManager::parse_vector_query_str("vec:([0.34, 0.66, 0.12, 0.68], k: 10", vector_query);
-    ASSERT_TRUE(parsed);
-
-    vector_query._reset();
-    parsed = CollectionManager::parse_vector_query_str("vec:(0.34, 0.66, 0.12, 0.68, k: 10)", vector_query);
-    ASSERT_FALSE(parsed);
-
-    vector_query._reset();
-    parsed = CollectionManager::parse_vector_query_str("vec:([0.34, 0.66, 0.12, 0.68], )", vector_query);
-    ASSERT_FALSE(parsed);
-
-    vector_query._reset();
-    parsed = CollectionManager::parse_vector_query_str("vec([0.34, 0.66, 0.12, 0.68])", vector_query);
-    ASSERT_FALSE(parsed);
-}
-
 TEST_F(CollectionManagerTest, Presets) {
    // try getting on a blank slate
    auto presets = collectionManager.get_presets();
--- a/test/collection_synonyms_test.cpp
+++ b/test/collection_synonyms_test.cpp
@ -97,9 +97,9 @@ TEST_F(CollectionSynonymsTest, SynonymParsingFromJson) {
    ASSERT_STREQ("#", synonym_plus.synonyms[1][0].c_str());

    nlohmann::json view_json = synonym_plus.to_view_json();
-    ASSERT_EQ(2, view_json["symbols"].size());
-    ASSERT_EQ("+", view_json["symbols"][0].get<std::string>());
-    ASSERT_EQ("#", view_json["symbols"][1].get<std::string>());
+    ASSERT_EQ(2, view_json["symbols_to_index"].size());
+    ASSERT_EQ("+", view_json["symbols_to_index"][0].get<std::string>());
+    ASSERT_EQ("#", view_json["symbols_to_index"][1].get<std::string>());

    // when `id` is not given
    nlohmann::json syn_json_without_id = {
--- a/test/collection_vector_search_test.cpp
+++ b/test/collection_vector_search_test.cpp
@ -144,6 +144,33 @@ TEST_F(CollectionVectorTest, BasicVectorQuerying) {
    ASSERT_FALSE(res_op.ok());
    ASSERT_EQ("Field `zec` does not have a vector query index.", res_op.error());

+    // pass `id` of existing doc instead of vector, query doc should be omitted from results
+    results = coll1->search("*", {}, "", {}, {}, {0}, 10, 1, FREQUENCY, {true}, Index::DROP_TOKENS_THRESHOLD,
+                            spp::sparse_hash_set<std::string>(),
+                            spp::sparse_hash_set<std::string>(), 10, "", 30, 5,
+                            "", 10, {}, {}, {}, 0,
+                            "<mark>", "</mark>", {}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7, fallback,
+                            4, {off}, 32767, 32767, 2,
+                            false, true, "vec:([], id: 1)").get();
+
+    ASSERT_EQ(2, results["found"].get<size_t>());
+    ASSERT_EQ(2, results["hits"].size());
+
+    ASSERT_STREQ("0", results["hits"][0]["document"]["id"].get<std::string>().c_str());
+    ASSERT_STREQ("2", results["hits"][1]["document"]["id"].get<std::string>().c_str());
+
+    // when `id` does not exist, return appropriate error
+    res_op = coll1->search("*", {}, "", {}, {}, {0}, 10, 1, FREQUENCY, {true}, Index::DROP_TOKENS_THRESHOLD,
+                           spp::sparse_hash_set<std::string>(),
+                           spp::sparse_hash_set<std::string>(), 10, "", 30, 5,
+                           "", 10, {}, {}, {}, 0,
+                           "<mark>", "</mark>", {}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7, fallback,
+                           4, {off}, 32767, 32767, 2,
+                           false, true, "vec:([], id: 100)");
+
+    ASSERT_FALSE(res_op.ok());
+    ASSERT_EQ("Document id referenced in vector query is not found.", res_op.error());
+
    // only supported with wildcard queries
    res_op = coll1->search("title", {"title"}, "", {}, {}, {0}, 10, 1, FREQUENCY, {true}, Index::DROP_TOKENS_THRESHOLD,
                           spp::sparse_hash_set<std::string>(),
--- a/test/core_api_utils_test.cpp
+++ b/test/core_api_utils_test.cpp
@ -199,6 +199,29 @@ TEST_F(CoreAPIUtilsTest, MultiSearchEmbeddedKeys) {
    // ensure that req params are appended to (embedded params are also rolled into req params)
    ASSERT_EQ("((user_id: 100) && (age: > 100)) && (foo: bar)", req->params["filter_by"]);

+    // when empty filter_by is present in req params, don't add ()
+    req->params["filter_by"] = "";
+    post_multi_search(req, res);
+    ASSERT_EQ("((age: > 100)) && (foo: bar)", req->params["filter_by"]);
+
+    // when empty filter_by in collection search params, don't add ()
+    req->params["filter_by"] = "user_id: 100";
+    search["filter_by"] = "";
+    body["searches"].clear();
+    body["searches"].push_back(search);
+    req->body = body.dump();
+    post_multi_search(req, res);
+    ASSERT_EQ("((user_id: 100)) && (foo: bar)", req->params["filter_by"]);
+
+    // when both are empty, don't add ()
+    req->params["filter_by"] = "";
+    search["filter_by"] = "";
+    body["searches"].clear();
+    body["searches"].push_back(search);
+    req->body = body.dump();
+    post_multi_search(req, res);
+    ASSERT_EQ("(foo: bar)", req->params["filter_by"]);
+
    // try setting max search limit
    req->embedded_params_vec[0]["limit_multi_searches"] = 0;
    ASSERT_FALSE(post_multi_search(req, res));
--- a/test/vector_query_ops_test.cpp
+++ b/test/vector_query_ops_test.cpp
@ -0,0 +1,73 @@
+#include <gtest/gtest.h>
+#include "vector_query_ops.h"
+
+class VectorQueryOpsTest : public ::testing::Test {
+protected:
+    void setupCollection() {
+    }
+
+    virtual void SetUp() {
+        setupCollection();
+    }
+
+    virtual void TearDown() {
+
+    }
+};
+
+TEST_F(VectorQueryOpsTest, ParseVectorQueryString) {
+    vector_query_t vector_query;
+    auto parsed = VectorQueryOps::parse_vector_query_str("vec:([0.34, 0.66, 0.12, 0.68], k: 10)", vector_query, nullptr);
+    ASSERT_TRUE(parsed.ok());
+    ASSERT_EQ("vec", vector_query.field_name);
+    ASSERT_EQ(10, vector_query.k);
+    std::vector<float> fvs = {0.34, 0.66, 0.12, 0.68};
+    ASSERT_EQ(fvs.size(), vector_query.values.size());
+    for (size_t i = 0; i < fvs.size(); i++) {
+        ASSERT_EQ(fvs[i], vector_query.values[i]);
+    }
+
+    vector_query._reset();
+    parsed = VectorQueryOps::parse_vector_query_str("vec:([0.34, 0.66, 0.12, 0.68], k: 10)", vector_query, nullptr);
+    ASSERT_TRUE(parsed.ok());
+
+    vector_query._reset();
+    parsed = VectorQueryOps::parse_vector_query_str("vec:([])", vector_query, nullptr);
+    ASSERT_FALSE(parsed.ok());
+    ASSERT_EQ("When a vector query value is empty, an `id` parameter must be present.", parsed.error());
+
+    // cannot pass both vector and id
+    vector_query._reset();
+    parsed = VectorQueryOps::parse_vector_query_str("vec:([0.34, 0.66, 0.12, 0.68], id: 10)", vector_query, nullptr);
+    ASSERT_FALSE(parsed.ok());
+    ASSERT_EQ("Malformed vector query string: cannot pass both vector query and `id` parameter.", parsed.error());
+
+    vector_query._reset();
+    parsed = VectorQueryOps::parse_vector_query_str("vec:([], k: 10)", vector_query, nullptr);
+    ASSERT_FALSE(parsed.ok());
+    ASSERT_EQ("When a vector query value is empty, an `id` parameter must be present.", parsed.error());
+
+    vector_query._reset();
+    parsed = VectorQueryOps::parse_vector_query_str("vec:[0.34, 0.66, 0.12, 0.68], k: 10)", vector_query, nullptr);
+    ASSERT_FALSE(parsed.ok());
+    ASSERT_EQ("Malformed vector query string.", parsed.error());
+
+    vector_query._reset();
+    parsed = VectorQueryOps::parse_vector_query_str("vec:([0.34, 0.66, 0.12, 0.68], k: 10", vector_query, nullptr);
+    ASSERT_TRUE(parsed.ok());
+
+    vector_query._reset();
+    parsed = VectorQueryOps::parse_vector_query_str("vec:(0.34, 0.66, 0.12, 0.68, k: 10)", vector_query, nullptr);
+    ASSERT_FALSE(parsed.ok());
+    ASSERT_EQ("Malformed vector query string.", parsed.error());
+
+    vector_query._reset();
+    parsed = VectorQueryOps::parse_vector_query_str("vec:([0.34, 0.66, 0.12, 0.68], )", vector_query, nullptr);
+    ASSERT_FALSE(parsed.ok());
+    ASSERT_EQ("Malformed vector query string.", parsed.error());
+
+    vector_query._reset();
+    parsed = VectorQueryOps::parse_vector_query_str("vec([0.34, 0.66, 0.12, 0.68])", vector_query, nullptr);
+    ASSERT_FALSE(parsed.ok());
+    ASSERT_EQ("Malformed vector query string.", parsed.error());
+}