From 92c38c3fa5ea0d1593fb21fe06ba792e9a119000 Mon Sep 17 00:00:00 2001
From: krunal1313 <krunal1313@gmail.com>
Date: Mon, 1 May 2023 12:01:35 +0530
Subject: [PATCH] adding tests and optimizing indexing

---
 BUILD                             |    1 +
 include/collection.h              |   10 +-
 include/facet_index.h             |    4 +-
 include/index.h                   |   23 +-
 src/collection.cpp                |   15 +-
 src/facet_index.cpp               |   43 +-
 src/field.cpp                     |    2 +-
 src/index.cpp                     |  171 +++--
 src/num_tree.cpp                  |   25 +-
 test/collection_faceting_test.cpp | 1191 ++++++++++++++++++++++++++++-
 10 files changed, 1373 insertions(+), 112 deletions(-)
diff --git a/BUILD b/BUILD
index f47ced69..a78c2f28 100644
--- a/BUILD
+++ b/BUILD
@@ -134,6 +134,7 @@ TEST_COPTS = [
     "-Wno-unused-parameter",
     "-Werror=return-type",
     "-g",
+    "-DFORCE_INTERSECTION",
 ]
 
 config_setting(
diff --git a/include/collection.h b/include/collection.h
index caaf897f..0eb820bb 100644
--- a/include/collection.h
+++ b/include/collection.h
@@ -182,8 +182,8 @@ private:
                                           std::vector<field>& new_fields,
                                           bool enable_nested_fields);
 
-    static bool facet_count_compare(const std::pair<uint64_t, facet_count_t>& a,
-                                    const std::pair<uint64_t, facet_count_t>& b) {
+    static bool facet_count_compare(const std::pair<uint32_t, facet_count_t>& a,
+                                    const std::pair<uint32_t, facet_count_t>& b) {
         return std::tie(a.second.count, a.first) > std::tie(b.second.count, b.first);
     }
 
@@ -460,7 +460,11 @@ public:
                                   const text_match_type_t match_type = max_score,
                                   const size_t facet_sample_percent = 100,
                                   const size_t facet_sample_threshold = 0,
-                                  const size_t page_offset = UINT32_MAX) const;
+                                  const size_t page_offset = UINT32_MAX
+#ifdef FORCE_INTERSECTION
+                                  , bool force_intersection = false
+#endif
+                                  ) const;
 
     Option<bool> get_filter_ids(const std::string & filter_query, filter_result_t& filter_result) const;
 
diff --git a/include/facet_index.h b/include/facet_index.h
index cafb6c74..491148b5 100644
--- a/include/facet_index.h
+++ b/include/facet_index.h
@@ -52,7 +52,5 @@ public:
     size_t get_facet_count(const std::string& field);
 
     size_t intersect(const std::string& val, const uint32_t* result_ids, int result_id_len,
-        int max_facet_count, std::map<std::string, uint32_t>& found, bool is_wildcard_no_filter_query);
-    
-    std::string get_facet_by_count_index(const std::string& field, uint32_t count_index);
+        int max_facet_count, std::map<std::string, uint32_t>& found, bool is_wildcard_no_filter_query);    
 };
\ No newline at end of file
diff --git a/include/index.h b/include/index.h
index d045d923..d347437d 100644
--- a/include/index.h
+++ b/include/index.h
@@ -363,7 +363,11 @@ private:
                    const std::vector<facet_info_t>& facet_infos,
                    size_t group_limit, const std::vector<std::string>& group_by_fields,
                    const uint32_t* result_ids, size_t results_size,
-                   int max_facet_count, bool is_wildcard_query, bool no_filters_provided) const;
+                   int max_facet_count, bool is_wildcard_query, bool no_filters_provided
+#ifdef FORCE_INTERSECTION
+                   , bool force_intersection = false
+#endif
+                   ) const;
 
     bool static_filter_query_eval(const override_t* override, std::vector<std::string>& tokens,
                                   filter_node_t*& filter_tree_root) const;
@@ -520,6 +524,8 @@ private:
 
     static void compute_facet_stats(facet &a_facet, const std::string& raw_value, const std::string & field_type);
 
+    static void compute_facet_stats(facet &a_facet, const int64_t raw_value, const std::string & field_type);
+
     static void get_doc_changes(const index_operation_t op, nlohmann::json &update_doc,
                                 const nlohmann::json &old_doc, nlohmann::json &new_doc, nlohmann::json &del_doc);
 
@@ -631,7 +637,12 @@ public:
 
     // Public operations
 
-    Option<bool> run_search(search_args* search_params, const std::string& collection_name);
+    Option<bool> run_search(search_args* search_params, 
+                            const std::string& collection_name
+#ifdef FORCE_INTERSECTION
+                            , bool force_intersection
+#endif
+                            );
 
     Option<bool> search(std::vector<query_tokens_t>& field_query_tokens, const std::vector<search_field_t>& the_fields,
                 const text_match_type_t match_type,
@@ -656,7 +667,11 @@ public:
                 const size_t max_extra_suffix, const size_t facet_query_num_typos,
                 const bool filter_curated_hits, enable_t split_join_tokens,
                 const vector_query_t& vector_query, size_t facet_sample_percent, size_t facet_sample_threshold,
-                const std::string& collection_name) const;
+                const std::string& collection_name
+#ifdef FORCE_INTERSECTION
+                , bool force_intersection = false
+#endif
+                ) const;
 
     void remove_field(uint32_t seq_id, const nlohmann::json& document, const std::string& field_name);
 
@@ -942,6 +957,8 @@ public:
                         uint32_t filter_ids_length, std::set<uint32_t>& curated_ids,
                         std::map<size_t, std::map<size_t, uint32_t>>& included_ids_map,
                         std::vector<uint32_t>& included_ids_vec) const;
+    
+    int64_t get_doc_val_from_sort_index(const std::string& field_name, uint32_t doc_seq_id) const;
 };
 
 template<class T>
diff --git a/src/collection.cpp b/src/collection.cpp
index 19aa663a..ecbed5ec 100644
--- a/src/collection.cpp
+++ b/src/collection.cpp
@@ -1070,7 +1070,11 @@ Option<nlohmann::json> Collection::search(std::string  raw_query,
                                   const text_match_type_t match_type,
                                   const size_t facet_sample_percent,
                                   const size_t facet_sample_threshold,
-                                  const size_t page_offset) const {
+                                  const size_t page_offset
+#ifdef FORCE_INTERSECTION
+                                  , bool force_intersection
+#endif
+                                  ) const {
 
     std::shared_lock lock(mutex);
 
@@ -1520,7 +1524,12 @@ Option<nlohmann::json> Collection::search(std::string  raw_query,
 
     std::unique_ptr<search_args> search_params_guard(search_params);
 
-    auto search_op = index->run_search(search_params, name);
+    auto search_op = index->run_search(search_params, name
+#ifdef FORCE_INTERSECTION
+                                    , force_intersection
+#endif
+                                    );
+
     if (!search_op.ok()) {
         return Option<nlohmann::json>(search_op.code(), search_op.error());
     }
@@ -1937,7 +1946,7 @@ Option<nlohmann::json> Collection::search(std::string  raw_query,
             auto max_facets = std::min(max_facet_values, facet_counts.size());
             std::sort(facet_counts.begin(), facet_counts.end(), 
                 [&](const auto& p1, const auto& p2) {
-                    return p1.second > p2.second;
+                    return std::tie(p1.second, p1.first) > std::tie(p2.second, p2.first);
                 });
 
             for(int i = 0; i < max_facets; ++i) {
diff --git a/src/facet_index.cpp b/src/facet_index.cpp
index 2d49a7df..902134f2 100644
--- a/src/facet_index.cpp
+++ b/src/facet_index.cpp
@@ -39,23 +39,17 @@ uint32_t facet_index_t::insert(const std::string& field, const std::string& valu
             counter_list.emplace_back(sv, facet_count);
         } else {
             auto counter_it = counter_list.begin();
-            //remove node from list
-           for(counter_it = counter_list.begin(); counter_it != counter_list.end(); ++counter_it) {
-                if(counter_it->facet_value == sv) {
-                    //found facet in first node
-                    counter_list.erase(counter_it);
-                    break;
-                }
-            }
-
-            //find position in list and add node with updated count
+   
             count_list node(sv, facet_count); 
 
             for(counter_it = counter_list.begin(); counter_it != counter_list.end(); ++counter_it) {
-                // LOG (INFO) << "inserting in middle or front facet " << node.facet_value 
-                //     << " with count " << node.count;
-                if(counter_it->count <= facet_count) {
-                    counter_list.emplace(counter_it, node);
+                if(counter_it->facet_value == sv) {
+                    counter_it->count = facet_count;
+
+                    auto prev_node = std::prev(counter_it);
+                    if(prev_node->count < counter_it->count) {
+                        std::swap(prev_node, counter_it);
+                    }
                     break;
                 }
             }
@@ -127,9 +121,6 @@ size_t facet_index_t::intersect(const std::string& field, const uint32_t* result
             ids_t::uncompress(ids, id_list);
             const auto ids_len = id_list.size();
             for(int i = 0; i < result_ids_len; ++i) {
-                // if(std::binary_search(id_list.begin(), id_list.end(), result_ids[i])) {
-                //    ++count;
-                // }
                 uint32_t* out = nullptr;
                 count = ArrayUtils::and_scalar(id_list.data(), id_list.size(),
                     result_ids, result_ids_len, &out);
@@ -148,24 +139,6 @@ size_t facet_index_t::intersect(const std::string& field, const uint32_t* result
     return found.size();
 }
 
-std::string facet_index_t::get_facet_by_count_index(const std::string& field, uint32_t count_index) {
-
-    const auto& facet_field_it = facet_field_map.find(field);
-
-    if(facet_field_it == facet_field_map.end()) {
-        return "";
-    }
-    std::string result = "";
-    auto facet_index_map = facet_field_it->second.facet_index_map;
-
-    for(auto it = facet_index_map.begin(); it != facet_index_map.end(); ++it) {
-        if(it.value().index == count_index) {
-            result = it.key();
-        }
-    }
-    return result;
-}
-
 facet_index_t::~facet_index_t() {
     facet_field_map.clear();    
 }
diff --git a/src/field.cpp b/src/field.cpp
index bcc8470a..6e1a4de6 100644
--- a/src/field.cpp
+++ b/src/field.cpp
@@ -652,7 +652,7 @@ Option<bool> field::json_field_to_field(bool enable_nested_fields, nlohmann::jso
         if(field_json["type"] == field_types::INT32 || field_json["type"] == field_types::INT64 ||
            field_json["type"] == field_types::FLOAT || field_json["type"] == field_types::BOOL ||
            field_json["type"] == field_types::GEOPOINT || field_json["type"] == field_types::GEOPOINT_ARRAY) {
-            if(field_json.count(fields::num_dim) == 0) {
+            if((field_json.count(fields::num_dim) == 0) || (field_json[fields::facet])) {
                 field_json[fields::sort] = true;
             } else {
                 field_json[fields::sort] = false;
diff --git a/src/index.cpp b/src/index.cpp
index 26d2fb49..4080e0de 100644
--- a/src/index.cpp
+++ b/src/index.cpp
@@ -1139,12 +1139,66 @@ void Index::compute_facet_stats(facet &a_facet, const std::string& raw_value, co
     }
 }
 
+void Index::compute_facet_stats(facet &a_facet, const int64_t raw_value, const std::string & field_type) {
+    if(field_type == field_types::INT32 || field_type == field_types::INT32_ARRAY) {
+        int32_t val = raw_value;
+        if (val < a_facet.stats.fvmin) {
+            a_facet.stats.fvmin = val;
+        }
+        if (val > a_facet.stats.fvmax) {
+            a_facet.stats.fvmax = val;
+        }
+        a_facet.stats.fvsum += val;
+        a_facet.stats.fvcount++;
+    } else if(field_type == field_types::INT64 || field_type == field_types::INT64_ARRAY) {
+        int64_t val = raw_value;
+        if(val < a_facet.stats.fvmin) {
+            a_facet.stats.fvmin = val;
+        }
+        if(val > a_facet.stats.fvmax) {
+            a_facet.stats.fvmax = val;
+        }
+        a_facet.stats.fvsum += val;
+        a_facet.stats.fvcount++;
+    } else if(field_type == field_types::FLOAT || field_type == field_types::FLOAT_ARRAY) {
+        float val = int64_t_to_float(raw_value);
+        if(val < a_facet.stats.fvmin) {
+            a_facet.stats.fvmin = val;
+        }
+        if(val > a_facet.stats.fvmax) {
+            a_facet.stats.fvmax = val;
+        }
+        a_facet.stats.fvsum += val;
+        a_facet.stats.fvcount++;
+    }
+}
+
+int64_t Index::get_doc_val_from_sort_index(const std::string& field_name, uint32_t doc_seq_id) const {
+
+    auto sort_index_it = sort_index.find(field_name);
+                    
+    if(sort_index_it != sort_index.end()){
+        auto doc_id_val_map = sort_index_it->second;
+        auto doc_seq_id_it = doc_id_val_map->find(doc_seq_id);
+
+        if(doc_seq_id_it != doc_id_val_map->end()){
+            return doc_seq_id_it->second;
+        }
+    }
+
+    return 0;
+}
+
 void Index::do_facets(std::vector<facet> & facets, facet_query_t & facet_query,
                       bool estimate_facets, size_t facet_sample_percent,
                       const std::vector<facet_info_t>& facet_infos,
                       const size_t group_limit, const std::vector<std::string>& group_by_fields,
                       const uint32_t* result_ids, size_t results_size, 
-                      int max_facet_count, bool is_wildcard_query, bool no_filters_provided) const {
+                      int max_facet_count, bool is_wildcard_query, bool no_filters_provided
+#ifdef FORCE_INTERSECTION
+                      , bool force_intersection
+#endif
+                      ) const {
     // assumed that facet fields have already been validated upstream
     for(size_t findex=0; findex < facets.size(); findex++) {
         auto& a_facet = facets[findex];
@@ -1170,32 +1224,45 @@ void Index::do_facets(std::vector<facet> & facets, facet_query_t & facet_query,
             }
         }
 
-        if(results_size && facet_records && (facet_records <= 10 || is_wildcard_query) &&
-            !use_facet_query && group_limit == 0 && no_filters_provided) {
+#ifdef FORCE_INTERSECTION
+        bool use_hashes = false;
+
+        if(!force_intersection) {
+            use_hashes = true;
+        }
+#endif
+
+        if(results_size && facet_records && ((facet_records <= 10 || is_wildcard_query) &&
+            !use_facet_query && group_limit == 0 && no_filters_provided)
+#ifdef FORCE_INTERSECTION
+            && !use_hashes || force_intersection
+#endif
+            ) {
             //LOG(INFO) << "Using intersection to find facets";
             a_facet.is_intersected = true;
 
             std::map<std::string, uint32_t> facet_results;
-
-            if(facet_field.is_string()) {
-                facet_index_v4->intersect(a_facet.field_name, result_ids, 
-                    results_size, max_facet_count, facet_results, is_wildcard_query & no_filters_provided);
-            } else {
-                std::map<int64_t, uint32_t> facet_counts;
-                numerical_index.at(a_facet.field_name)->intersect(result_ids, 
+            if(!facet_field.name.empty()) {
+                if(facet_field.is_string()) {
+                    facet_index_v4->intersect(a_facet.field_name, result_ids, 
+                        results_size, max_facet_count, facet_results, is_wildcard_query & no_filters_provided);
+                } else {
+                    std::map<int64_t, uint32_t> facet_counts;
+                    numerical_index.at(a_facet.field_name)->intersect(result_ids, 
                     results_size, max_facet_count, facet_counts, is_wildcard_query & no_filters_provided);
                 
-                for(const auto& kv : facet_counts) {
-                    std::string val;
-                    if(facet_field.is_float()) {
-                        val = std::to_string(int64_t_to_float(kv.first));
-                    } else if(facet_field.is_bool()) {
-                        val = kv.first == 1 ? "true" : "false";
-                    } else {
-                        val = std::to_string(kv.first);
+                    for(const auto& kv : facet_counts) {
+                        std::string val;
+                        if(facet_field.is_float()) {
+                            val = StringUtils::float_to_str(int64_t_to_float(kv.first));
+                        } else if(facet_field.is_bool()) {
+                            val = kv.first == 1 ? "true" : "false";
+                        } else {
+                            val = std::to_string(kv.first);
+                        }
+
+                        facet_results[val] = kv.second;
                     }
-                    
-                    facet_results[val] = kv.second;
                 }
             }
 
@@ -1269,32 +1336,24 @@ void Index::do_facets(std::vector<facet> & facets, facet_query_t & facet_query,
                     RETURN_CIRCUIT_BREAKER
                 }
 
+                int64_t doc_val = 0;
                 for(size_t j = 0; j < facet_hash_count; j++) {
                     if(facet_field.is_array()) {
                         fhash = facet_map_it->second.hashes[j];
                     }
                     if(should_compute_stats) {
-                        std::string fvalue = 
-                            facet_index_v4->get_facet_by_count_index(a_facet.field_name, fhash);
-                        if(!fvalue.empty()) {
-                            compute_facet_stats(a_facet, fvalue, facet_field.type);
-                        }
+                        doc_val = get_doc_val_from_sort_index(a_facet.field_name, doc_seq_id);
+                        compute_facet_stats(a_facet, doc_val, facet_field.type);
                     }
+
                     if(a_facet.is_range_query) {
-                        auto sort_index_it = sort_index.find(a_facet.field_name);
-                        if(sort_index_it != sort_index.end()){
-                            auto doc_id_val_map = sort_index_it->second;
-                            auto doc_seq_id_it = doc_id_val_map->find(doc_seq_id);
-                            if(doc_seq_id_it != doc_id_val_map->end()){
+                        doc_val = get_doc_val_from_sort_index(a_facet.field_name, doc_seq_id); 
                             
-                                std::string doc_val = std::to_string(doc_seq_id_it->second);
-                                std::pair<std::string, std::string> range_pair {};
-                                if(a_facet.get_range(doc_val, range_pair)) {
-                                    const auto& range_id = range_pair.first;
-                                    facet_count_t& facet_count = a_facet.result_map[range_id];
-                                    facet_count.count += 1;
-                                }
-                            }
+                        std::pair<std::string, std::string> range_pair {};
+                        if(a_facet.get_range(std::to_string(doc_val), range_pair)) {
+                            const auto& range_id = range_pair.first;
+                            facet_count_t& facet_count = a_facet.result_map[range_id];
+                            facet_count.count += 1;
                         }
                     } else if(!use_facet_query || fquery_hashes.find(fhash) != fquery_hashes.end()) {
                         std::string fhash_str = std::to_string(fhash);
@@ -2288,7 +2347,11 @@ Option<bool> Index::get_approximate_reference_filter_ids_with_lock(filter_node_t
     return rearrange_filter_tree(filter_tree_root, filter_ids_length);
 }
 
-Option<bool> Index::run_search(search_args* search_params, const std::string& collection_name) {
+Option<bool> Index::run_search(search_args* search_params, const std::string& collection_name
+#ifdef FORCE_INTERSECTION
+                            , bool force_intersection
+#endif
+    ) {
     return search(search_params->field_query_tokens,
            search_params->search_fields,
            search_params->match_type,
@@ -2323,7 +2386,11 @@ Option<bool> Index::run_search(search_args* search_params, const std::string& co
            search_params->vector_query,
            search_params->facet_sample_percent,
            search_params->facet_sample_threshold,
-           collection_name);
+           collection_name
+#ifdef FORCE_INTERSECTION
+           , force_intersection
+#endif
+           );
 }
 
 void Index::collate_included_ids(const std::vector<token_t>& q_included_tokens,
@@ -2772,7 +2839,11 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
                    const bool filter_curated_hits, const enable_t split_join_tokens,
                    const vector_query_t& vector_query,
                    size_t facet_sample_percent, size_t facet_sample_threshold,
-                   const std::string& collection_name) const {
+                   const std::string& collection_name
+#ifdef FORCE_INTERSECTION
+                   , bool force_intersection
+#endif
+                    ) const {
     std::shared_lock lock(mutex);
 
     uint32_t filter_ids_length = 0;
@@ -3297,7 +3368,11 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
                                          batch_result_ids, batch_res_len, &facet_infos, max_facet_values,
                                          is_wildcard_query, no_filters_provided, estimate_facets, facet_sample_percent,
                                          &parent_search_begin, &parent_search_stop_ms, &parent_search_cutoff,
-                                         &num_processed, &m_process, &cv_process]() {
+                                         &num_processed, &m_process, &cv_process
+#ifdef FORCE_INTERSECTION
+                                         , force_intersection
+#endif
+                                         ]() {
                 search_begin_us = parent_search_begin;
                 search_stop_us = parent_search_stop_ms;
                 search_cutoff = parent_search_cutoff;
@@ -3307,7 +3382,11 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
                 do_facets(facet_batches[thread_id], fq, estimate_facets, facet_sample_percent,
                           facet_infos, group_limit, group_by_fields,
                           batch_result_ids, batch_res_len, max_facet_values, 
-                          is_wildcard_query, no_filters_provided);
+                          is_wildcard_query, no_filters_provided
+#ifdef FORCE_INTERSECTION
+                        , force_intersection
+#endif
+                          );
                 std::unique_lock<std::mutex> lock(m_process);
                 num_processed++;
                 parent_search_cutoff = parent_search_cutoff || search_cutoff;
@@ -3392,7 +3471,11 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
                         max_candidates, facet_infos);
     do_facets(facets, facet_query, estimate_facets, facet_sample_percent,
               facet_infos, group_limit, group_by_fields, &included_ids_vec[0], 
-              included_ids_vec.size(), max_facet_values, is_wildcard_query, no_filters_provided);
+              included_ids_vec.size(), max_facet_values, is_wildcard_query, no_filters_provided
+#ifdef FORCE_INTERSECTION
+              , force_intersection
+#endif
+              );
 
     all_result_ids_len += curated_topster->size;
 
diff --git a/src/num_tree.cpp b/src/num_tree.cpp
index 6e433eb1..7f5aabd4 100644
--- a/src/num_tree.cpp
+++ b/src/num_tree.cpp
@@ -22,23 +22,17 @@ void num_tree_t::insert(int64_t value, uint32_t id, bool is_facet) {
             counter_list.emplace_back(value, facet_count);
         } else {
             auto counter_it = counter_list.begin();
-            //remove node from list
-           for(counter_it = counter_list.begin(); counter_it != counter_list.end(); ++counter_it) {
-                if(counter_it->facet_value == value) {
-                    //found facet in first node
-                    counter_list.erase(counter_it);
-                    break;
-                }
-            }
-    
-            //find position in list and add node with updated count
+          
             count_list node(value, facet_count); 
     
             for(counter_it = counter_list.begin(); counter_it != counter_list.end(); ++counter_it) {
-                // LOG (INFO) << "inserting in middle or front facet " << node.facet_value 
-                //     << " with count " << node.count;
-                if(counter_it->count <= facet_count) {
-                    counter_list.emplace(counter_it, node);
+               if(counter_it->facet_value == value) {
+                    counter_it->count = facet_count;
+
+                    auto prev_node = std::prev(counter_it);
+                    if(prev_node->count < counter_it->count) {
+                        std::swap(prev_node, counter_it);
+                    }
                     break;
                 }
             }
@@ -389,9 +383,6 @@ size_t num_tree_t::intersect(const uint32_t* result_ids, int result_ids_len, int
             ids_t::uncompress(ids, id_list);
             const auto ids_len = id_list.size();
             for(int i = 0; i < result_ids_len; ++i) {
-                // if(std::binary_search(id_list.begin(), id_list.end(), result_ids[i])) {
-                //    ++count;
-                // }
                 uint32_t* out = nullptr;
                 count = ArrayUtils::and_scalar(id_list.data(), id_list.size(),
                     result_ids, result_ids_len, &out);
diff --git a/test/collection_faceting_test.cpp b/test/collection_faceting_test.cpp
index 649a91d0..63081292 100644
--- a/test/collection_faceting_test.cpp
+++ b/test/collection_faceting_test.cpp
@@ -252,7 +252,7 @@ TEST_F(CollectionFacetingTest, FacetCounts) {
     ASSERT_FLOAT_EQ(5, results["facet_counts"][0]["stats"]["total_values"].get<size_t>());
 
     // check for "0" case
-    ASSERT_STREQ("0.000000", results["facet_counts"][0]["counts"][2]["value"].get<std::string>().c_str());
+    ASSERT_STREQ("0", results["facet_counts"][0]["counts"][0]["value"].get<std::string>().c_str());
     ASSERT_EQ(1, results["facet_counts"][0]["counts"][0]["count"].get<size_t>());
 
     // facet query on a float field
@@ -506,8 +506,8 @@ TEST_F(CollectionFacetingTest, FacetCountsFloatPrecision) {
 
     ASSERT_STREQ("points", results["facet_counts"][0]["field_name"].get<std::string>().c_str());
     ASSERT_EQ(1, (int) results["facet_counts"][0]["counts"][0]["count"]);
-    ASSERT_STREQ("113.400002", results["facet_counts"][0]["counts"][0]["value"].get<std::string>().c_str());
-    ASSERT_STREQ("113.400002",results["facet_counts"][0]["counts"][0]["highlighted"].get<std::string>().c_str());
+    ASSERT_STREQ("113.4", results["facet_counts"][0]["counts"][0]["value"].get<std::string>().c_str());
+    ASSERT_STREQ("113.4",results["facet_counts"][0]["counts"][0]["highlighted"].get<std::string>().c_str());
 
     collectionManager.drop_collection("coll1");
 }
@@ -1574,3 +1574,1188 @@ TEST_F(CollectionFacetingTest, FacetOnArrayFieldWithSpecialChars) {
         }
     }
 }
+
+
+class CollectionOptimizedFacetingTest : public ::testing::Test {
+protected:
+    Store *store;
+    CollectionManager & collectionManager = CollectionManager::get_instance();
+    std::atomic<bool> quit = false;
+
+    std::vector<std::string> query_fields;
+    std::vector<sort_by> sort_fields;
+
+    void setupCollection() {
+        std::string state_dir_path = "/tmp/typesense_test/collection_optimized_faceting";
+        LOG(INFO) << "Truncating and creating: " << state_dir_path;
+        system(("rm -rf "+state_dir_path+" && mkdir -p "+state_dir_path).c_str());
+
+        store = new Store(state_dir_path);
+        collectionManager.init(store, 1.0, "auth_key", quit);
+        collectionManager.load(8, 1000);
+    }
+
+    virtual void SetUp() {
+        setupCollection();
+    }
+
+    virtual void TearDown() {
+        collectionManager.dispose();
+        delete store;
+    }
+};
+
+TEST_F(CollectionOptimizedFacetingTest, FacetCounts) {
+    Collection *coll_array_fields;
+
+    std::ifstream infile(std::string(ROOT_DIR)+"test/numeric_array_documents.jsonl");
+    std::vector<field> fields = {field("name", field_types::STRING, false),
+                                 field("name_facet", field_types::STRING, true),
+                                 field("age", field_types::INT32, true),
+                                 field("years", field_types::INT32_ARRAY, true),
+                                 field("rating", field_types::FLOAT, true),
+                                 field("timestamps", field_types::INT64_ARRAY, true),
+                                 field("tags", field_types::STRING_ARRAY, true),
+                                 field("optional_facet", field_types::INT64_ARRAY, true, true),};
+
+    std::vector<sort_by> sort_fields = { sort_by("age", "DESC") };
+
+    coll_array_fields = collectionManager.get_collection("coll_array_fields").get();
+    if(coll_array_fields == nullptr) {
+        coll_array_fields = collectionManager.create_collection("coll_array_fields", 4, fields, "age").get();
+    }
+
+    std::string json_line;
+
+    while (std::getline(infile, json_line)) {
+        nlohmann::json document = nlohmann::json::parse(json_line);
+        document["name_facet"] = document["name"];
+        const std::string & patched_json_line = document.dump();
+        coll_array_fields->add(patched_json_line);
+    }
+
+    infile.close();
+
+    query_fields = {"name"};
+    std::vector<std::string> facets = {"tags"};
+
+    // single facet with no filters
+    nlohmann::json results = coll_array_fields->search("Jeremy", query_fields, "", facets, sort_fields, 
+                                                        {0}, 10, 1, FREQUENCY, {false}, 1UL, 
+                                                        spp::sparse_hash_set<std::string>(),
+                                                        spp::sparse_hash_set<std::string>(), 
+                                                        10UL, "", 30UL, 4UL, "", 1UL, "", "", {}, 
+                                                        3UL, "<mark>", "</mark>", {}, 4294967295UL, true, 
+                                                        false, true, "", false, 6000000UL, 4UL, 7UL, fallback,
+                                                        4UL, {off}, 32767UL, 32767UL, 2UL, 2UL, false, "", true, 
+                                                        0UL, max_score, 100UL, 0UL, 4294967295UL, true).get();    
+
+    ASSERT_EQ(5, results["hits"].size());
+
+    ASSERT_EQ(1, results["facet_counts"].size());
+    ASSERT_EQ(4, results["facet_counts"][0].size());
+    ASSERT_EQ("tags", results["facet_counts"][0]["field_name"]);
+    ASSERT_EQ(false, results["facet_counts"][0]["sampled"].get<bool>());
+    ASSERT_EQ(4, results["facet_counts"][0]["counts"].size());
+    ASSERT_EQ(1, results["facet_counts"][0]["stats"].size());
+    ASSERT_EQ(4, results["facet_counts"][0]["stats"]["total_values"].get<size_t>());
+
+    ASSERT_STREQ("gold", results["facet_counts"][0]["counts"][0]["value"].get<std::string>().c_str());
+    ASSERT_EQ(3, (int) results["facet_counts"][0]["counts"][0]["count"]);
+
+    ASSERT_STREQ("silver", results["facet_counts"][0]["counts"][1]["value"].get<std::string>().c_str());
+    ASSERT_EQ(3, (int) results["facet_counts"][0]["counts"][1]["count"]);
+
+    ASSERT_STREQ("bronze", results["facet_counts"][0]["counts"][2]["value"].get<std::string>().c_str());
+    ASSERT_EQ(2, (int) results["facet_counts"][0]["counts"][2]["count"]);
+
+    ASSERT_STREQ("FINE PLATINUM", results["facet_counts"][0]["counts"][3]["value"].get<std::string>().c_str());
+    ASSERT_EQ(1, (int) results["facet_counts"][0]["counts"][3]["count"]);
+
+    // facet with facet count limit
+    results = coll_array_fields->search("Jeremy", query_fields, "", facets, sort_fields, {0}, 10, 1,
+                                        FREQUENCY, {false}, 10, spp::sparse_hash_set<std::string>(),
+                                        spp::sparse_hash_set<std::string>(), 2, "", 30UL, 4UL, "", 1UL, 
+                                        "", "", {}, 3UL, "<mark>", "</mark>", {}, 4294967295UL, true, 
+                                        false, true, "", false, 6000000UL, 4UL, 7UL, fallback, 4UL, {off}, 
+                                        32767UL, 32767UL, 2UL, 2UL, false, "", true, 0UL, max_score, 100UL,
+                                        0UL, 4294967295UL, true).get();  
+    
+    ASSERT_EQ(5, results["hits"].size());
+
+    ASSERT_EQ(1, results["facet_counts"].size());
+    ASSERT_STREQ("tags", results["facet_counts"][0]["field_name"].get<std::string>().c_str());
+    ASSERT_EQ(2, results["facet_counts"][0]["counts"].size());
+
+    ASSERT_STREQ("gold", results["facet_counts"][0]["counts"][0]["value"].get<std::string>().c_str());
+    ASSERT_EQ(3, (int) results["facet_counts"][0]["counts"][0]["count"]);
+
+    ASSERT_STREQ("silver", results["facet_counts"][0]["counts"][1]["value"].get<std::string>().c_str());
+    ASSERT_EQ(3, (int) results["facet_counts"][0]["counts"][1]["count"]);
+
+    // 2 facets, 1 text query with no filters
+    facets.clear();
+    facets.push_back("tags");
+    facets.push_back("name_facet");
+    results = coll_array_fields->search("Jeremy", query_fields, "", facets, sort_fields, 
+                                                        {0}, 10, 1, FREQUENCY, {false}, 1UL, 
+                                                        spp::sparse_hash_set<std::string>(),
+                                                        spp::sparse_hash_set<std::string>(), 
+                                                        10UL, "", 30UL, 4UL, "", 1UL, "", "", {}, 
+                                                        3UL, "<mark>", "</mark>", {}, 4294967295UL, true, 
+                                                        false, true, "", false, 6000000UL, 4UL, 7UL, fallback,
+                                                        4UL, {off}, 32767UL, 32767UL, 2UL, 2UL, false, "", true, 
+                                                        0UL, max_score, 100UL, 0UL, 4294967295UL, true).get();    
+
+    ASSERT_EQ(5, results["hits"].size());
+    ASSERT_EQ(2, results["facet_counts"].size());
+
+    ASSERT_STREQ("tags", results["facet_counts"][0]["field_name"].get<std::string>().c_str());
+    ASSERT_STREQ("name_facet", results["facet_counts"][1]["field_name"].get<std::string>().c_str());
+
+    // facet value must one that's stored, not indexed (i.e. no tokenization/standardization)
+    ASSERT_STREQ("Jeremy Howard", results["facet_counts"][1]["counts"][0]["value"].get<std::string>().c_str());
+    ASSERT_EQ(5, (int) results["facet_counts"][1]["counts"][0]["count"]);
+
+    // facet with wildcard
+    results = coll_array_fields->search("Jeremy", query_fields, "", {"ag*"}, sort_fields, {0}, 10, 1, FREQUENCY,
+                                        {false},  1UL, spp::sparse_hash_set<std::string>(),
+                                        spp::sparse_hash_set<std::string>(), 
+                                        10UL, "", 30UL, 4UL, "", 1UL, "", "", {}, 
+                                        3UL, "<mark>", "</mark>", {}, 4294967295UL, true, 
+                                        false, true, "", false, 6000000UL, 4UL, 7UL, fallback,
+                                        4UL, {off}, 32767UL, 32767UL, 2UL, 2UL, false, "", true, 
+                                        0UL, max_score, 100UL, 0UL, 4294967295UL, true).get();
+
+    ASSERT_EQ(5, results["hits"].size());
+    ASSERT_EQ(1, results["facet_counts"].size());
+    ASSERT_STREQ("age", results["facet_counts"][0]["field_name"].get<std::string>().c_str());
+
+    // facet on a float field without query to check on stats
+    results = coll_array_fields->search("*", query_fields, "", {"rating"}, sort_fields, {0}, 10, 1, FREQUENCY,
+                                        {false}, Index::DROP_TOKENS_THRESHOLD,
+                                        spp::sparse_hash_set<std::string>(),
+                                        spp::sparse_hash_set<std::string>(), 10, "", 30UL, 4UL, 
+                                        "", 1UL, "", "", {}, 3UL, "<mark>", "</mark>", {}, 
+                                        4294967295UL, true, false, true, "", false, 6000000UL, 4UL,
+                                        7UL, fallback, 4UL, {off}, 32767UL, 32767UL, 2UL, 2UL, false,
+                                        "", true, 0UL, max_score, 100UL, 0UL, 4294967295UL, true).get();
+
+    ASSERT_EQ(5, results["facet_counts"][0]["stats"].size());
+    ASSERT_FLOAT_EQ(4.880199885368347, results["facet_counts"][0]["stats"]["avg"].get<double>());
+    ASSERT_FLOAT_EQ(0.0, results["facet_counts"][0]["stats"]["min"].get<double>());
+    ASSERT_FLOAT_EQ(9.99899959564209, results["facet_counts"][0]["stats"]["max"].get<double>());
+    ASSERT_FLOAT_EQ(24.400999426841736, results["facet_counts"][0]["stats"]["sum"].get<double>());
+    ASSERT_FLOAT_EQ(5, results["facet_counts"][0]["stats"]["total_values"].get<size_t>());
+
+    // check for "0" case
+    ASSERT_STREQ("0", results["facet_counts"][0]["counts"][0]["value"].get<std::string>().c_str());
+    ASSERT_EQ(1, results["facet_counts"][0]["counts"][0]["count"].get<size_t>());
+
+    facets.clear();
+    facets.push_back("tags");
+    
+    // empty facet query value should return all facets without any filtering of facets
+    results = coll_array_fields->search("*", query_fields, "", facets, sort_fields, {0}, 10, 1, FREQUENCY,
+                                       {false}, Index::DROP_TOKENS_THRESHOLD,
+                                       spp::sparse_hash_set<std::string>(),
+                                       spp::sparse_hash_set<std::string>(), 10, "tags: ", 30UL, 4UL, 
+                                        "", 1UL, "", "", {}, 3UL, "<mark>", "</mark>", {}, 
+                                        4294967295UL, true, false, true, "", false, 6000000UL, 4UL,
+                                        7UL, fallback, 4UL, {off}, 32767UL, 32767UL, 2UL, 2UL, false,
+                                        "", true, 0UL, max_score, 100UL, 0UL, 4294967295UL, true).get();
+
+    ASSERT_EQ(5, results["hits"].size());
+
+    results = coll_array_fields->search("*", query_fields, "", facets, sort_fields, {0}, 10, 1, FREQUENCY,
+                                       {false}, Index::DROP_TOKENS_THRESHOLD,
+                                       spp::sparse_hash_set<std::string>(),
+                                       spp::sparse_hash_set<std::string>(), 10, "tags:", 30UL, 4UL, 
+                                        "", 1UL, "", "", {}, 3UL, "<mark>", "</mark>", {}, 
+                                        4294967295UL, true, false, true, "", false, 6000000UL, 4UL,
+                                        7UL, fallback, 4UL, {off}, 32767UL, 32767UL, 2UL, 2UL, false,
+                                        "", true, 0UL, max_score, 100UL, 0UL, 4294967295UL, true).get();
+
+    ASSERT_EQ(5, results["hits"].size());
+
+    // Wildcard facet_by can have partial matches
+    results = coll_array_fields->search("*", query_fields, "", {"nam*"}, sort_fields, {0}, 10, 1, FREQUENCY,
+                                        {false}, Index::DROP_TOKENS_THRESHOLD,
+                                        spp::sparse_hash_set<std::string>(),
+                                        spp::sparse_hash_set<std::string>(), 10, "", 30UL, 4UL, 
+                                        "", 1UL, "", "", {}, 3UL, "<mark>", "</mark>", {}, 
+                                        4294967295UL, true, false, true, "", false, 6000000UL, 4UL,
+                                        7UL, fallback, 4UL, {off}, 32767UL, 32767UL, 2UL, 2UL, false,
+                                        "", true, 0UL, max_score, 100UL, 0UL, 4294967295UL, true).get();
+
+    ASSERT_EQ(5, results["hits"].size());
+    ASSERT_EQ(1, results["facet_counts"].size());
+    ASSERT_EQ("name_facet", results["facet_counts"][0]["field_name"].get<std::string>());
+
+    // Wildcard facet_by having no counts should not be returned
+    results = coll_array_fields->search("*", query_fields, "", {"optio*"}, sort_fields, {0}, 10, 1, FREQUENCY,
+                                        {false}, Index::DROP_TOKENS_THRESHOLD,
+                                        spp::sparse_hash_set<std::string>(),
+                                        spp::sparse_hash_set<std::string>(), 10, "", 30UL, 4UL, 
+                                        "", 1UL, "", "", {}, 3UL, "<mark>", "</mark>", {}, 
+                                        4294967295UL, true, false, true, "", false, 6000000UL, 4UL,
+                                        7UL, fallback, 4UL, {off}, 32767UL, 32767UL, 2UL, 2UL, false,
+                                        "", true, 0UL, max_score, 100UL, 0UL, 4294967295UL, true).get();
+
+    ASSERT_EQ(5, results["hits"].size());
+    ASSERT_EQ(0, results["facet_counts"].size());
+
+    results = coll_array_fields->search("*", query_fields, "", {"optional_facet"}, sort_fields, {0}, 10, 1, FREQUENCY,
+                                        {false}, Index::DROP_TOKENS_THRESHOLD,
+                                        spp::sparse_hash_set<std::string>(),
+                                        spp::sparse_hash_set<std::string>(), 10, "", 30UL, 4UL, 
+                                        "", 1UL, "", "", {}, 3UL, "<mark>", "</mark>", {}, 
+                                        4294967295UL, true, false, true, "", false, 6000000UL, 4UL,
+                                        7UL, fallback, 4UL, {off}, 32767UL, 32767UL, 2UL, 2UL, false,
+                                        "", true, 0UL, max_score, 100UL, 0UL, 4294967295UL, true).get();
+
+    ASSERT_EQ(5, results["hits"].size());
+    ASSERT_EQ(1, results["facet_counts"].size());
+    ASSERT_EQ("optional_facet", results["facet_counts"][0]["field_name"].get<std::string>());
+
+    // bad facet query syntax
+    auto res_op = coll_array_fields->search("*", query_fields, "", facets, sort_fields, {0}, 10, 1, FREQUENCY,
+                                            {false}, Index::DROP_TOKENS_THRESHOLD,
+                                            spp::sparse_hash_set<std::string>(),
+                                            spp::sparse_hash_set<std::string>(), 10, "foobar", 30UL, 4UL, 
+                                            "", 1UL, "", "", {}, 3UL, "<mark>", "</mark>", {}, 
+                                            4294967295UL, true, false, true, "", false, 6000000UL, 4UL,
+                                            7UL, fallback, 4UL, {off}, 32767UL, 32767UL, 2UL, 2UL, false,
+                                            "", true, 0UL, max_score, 100UL, 0UL, 4294967295UL, true);
+
+    ASSERT_FALSE(res_op.ok());
+    ASSERT_STREQ("Facet query must be in the `facet_field: value` format.", res_op.error().c_str());
+
+    // unknown facet field
+    res_op = coll_array_fields->search("*", query_fields, "", {"foobar"}, sort_fields, {0}, 10, 1, FREQUENCY,
+                                       {false}, Index::DROP_TOKENS_THRESHOLD,
+                                       spp::sparse_hash_set<std::string>(),
+                                       spp::sparse_hash_set<std::string>(), 10, "foobar: baz",  30UL, 4UL, 
+                                        "", 1UL, "", "", {}, 3UL, "<mark>", "</mark>", {}, 
+                                        4294967295UL, true, false, true, "", false, 6000000UL, 4UL,
+                                        7UL, fallback, 4UL, {off}, 32767UL, 32767UL, 2UL, 2UL, false,
+                                        "", true, 0UL, max_score, 100UL, 0UL, 4294967295UL, true);
+
+    ASSERT_FALSE(res_op.ok());
+    ASSERT_STREQ("Could not find a facet field named `foobar` in the schema.", res_op.error().c_str());
+
+    // only prefix matching is valid
+    res_op = coll_array_fields->search("*", query_fields, "", {"*_facet"}, sort_fields, {0}, 10, 1, FREQUENCY,
+                                       {false}, Index::DROP_TOKENS_THRESHOLD,
+                                       spp::sparse_hash_set<std::string>(),
+                                       spp::sparse_hash_set<std::string>(), 10, "",  30UL, 4UL, 
+                                        "", 1UL, "", "", {}, 3UL, "<mark>", "</mark>", {}, 
+                                        4294967295UL, true, false, true, "", false, 6000000UL, 4UL,
+                                        7UL, fallback, 4UL, {off}, 32767UL, 32767UL, 2UL, 2UL, false,
+                                        "", true, 0UL, max_score, 100UL, 0UL, 4294967295UL, true);
+
+    ASSERT_FALSE(res_op.ok());
+    ASSERT_STREQ("Only prefix matching with a wildcard is allowed.", res_op.error().c_str());
+
+    // unknown wildcard facet field
+    res_op = coll_array_fields->search("*", query_fields, "", {"foo*"}, sort_fields, {0}, 10, 1, FREQUENCY,
+                                       {false}, Index::DROP_TOKENS_THRESHOLD,
+                                       spp::sparse_hash_set<std::string>(),
+                                       spp::sparse_hash_set<std::string>(), 10, "", 30UL, 4UL, 
+                                        "", 1UL, "", "", {}, 3UL, "<mark>", "</mark>", {}, 
+                                        4294967295UL, true, false, true, "", false, 6000000UL, 4UL,
+                                        7UL, fallback, 4UL, {off}, 32767UL, 32767UL, 2UL, 2UL, false,
+                                        "", true, 0UL, max_score, 100UL, 0UL, 4294967295UL, true);
+
+    ASSERT_FALSE(res_op.ok());
+    ASSERT_STREQ("Could not find a facet field for `foo*` in the schema.", res_op.error().c_str());
+
+    // when facet query is given but no facet fields are specified, must return an error message
+    res_op = coll_array_fields->search("*", query_fields, "", {}, sort_fields, {0}, 10, 1, FREQUENCY,
+                                       {false}, Index::DROP_TOKENS_THRESHOLD,
+                                       spp::sparse_hash_set<std::string>(),
+                                       spp::sparse_hash_set<std::string>(), 10, "tags: foo", 30UL, 4UL, 
+                                        "", 1UL, "", "", {}, 3UL, "<mark>", "</mark>", {}, 
+                                        4294967295UL, true, false, true, "", false, 6000000UL, 4UL,
+                                        7UL, fallback, 4UL, {off}, 32767UL, 32767UL, 2UL, 2UL, false,
+                                        "", true, 0UL, max_score, 100UL, 0UL, 4294967295UL, true);
+
+    ASSERT_FALSE(res_op.ok());
+    ASSERT_STREQ("The `facet_query` parameter is supplied without a `facet_by` parameter.", res_op.error().c_str());
+
+    res_op = coll_array_fields->search("*", query_fields, "", {""}, sort_fields, {0}, 10, 1, FREQUENCY,
+                                       {false}, Index::DROP_TOKENS_THRESHOLD,
+                                       spp::sparse_hash_set<std::string>(),
+                                       spp::sparse_hash_set<std::string>(), 10, "tags: foo",  30UL, 4UL, 
+                                        "", 1UL, "", "", {}, 3UL, "<mark>", "</mark>", {}, 
+                                        4294967295UL, true, false, true, "", false, 6000000UL, 4UL,
+                                        7UL, fallback, 4UL, {off}, 32767UL, 32767UL, 2UL, 2UL, false,
+                                        "", true, 0UL, max_score, 100UL, 0UL, 4294967295UL, true);
+
+    ASSERT_FALSE(res_op.ok());
+    ASSERT_STREQ("Could not find a facet field named `` in the schema.", res_op.error().c_str());
+
+    // given facet query field must be part of facet fields requested
+    res_op = coll_array_fields->search("*", query_fields, "", facets, sort_fields, {0}, 10, 1, FREQUENCY,
+                                       {false}, Index::DROP_TOKENS_THRESHOLD,
+                                       spp::sparse_hash_set<std::string>(),
+                                       spp::sparse_hash_set<std::string>(), 10, "name_facet: jeremy", 30UL, 4UL, 
+                                        "", 1UL, "", "", {}, 3UL, "<mark>", "</mark>", {}, 
+                                        4294967295UL, true, false, true, "", false, 6000000UL, 4UL,
+                                        7UL, fallback, 4UL, {off}, 32767UL, 32767UL, 2UL, 2UL, false,
+                                        "", true, 0UL, max_score, 100UL, 0UL, 4294967295UL, true);
+
+    ASSERT_FALSE(res_op.ok());
+    ASSERT_STREQ("Facet query refers to a facet field `name_facet` that is not part of `facet_by` parameter.", res_op.error().c_str());
+
+    collectionManager.drop_collection("coll_array_fields");
+}
+
+TEST_F(CollectionOptimizedFacetingTest, FacetCountsBool) {
+    Collection *coll1;
+
+    std::vector<field> fields = {field("title", field_types::STRING, false),
+                                 field("points", field_types::INT32, false),
+                                 field("in_stock", field_types::BOOL, true)};
+
+    std::vector<sort_by> sort_fields = {sort_by("points", "DESC")};
+
+    coll1 = collectionManager.get_collection("coll1").get();
+    if (coll1 == nullptr) {
+        coll1 = collectionManager.create_collection("coll1", 4, fields, "points").get();
+    }
+
+    nlohmann::json doc;
+    doc["id"] = "100";
+    doc["title"] = "Ford Mustang";
+    doc["points"] = 25;
+    doc["in_stock"] = true;
+
+    coll1->add(doc.dump());
+
+    doc["id"] = "101";
+    doc["title"] = "Tesla Model S";
+    doc["points"] = 40;
+    doc["in_stock"] = false;
+
+    coll1->add(doc.dump());
+
+    doc["id"] = "102";
+    doc["title"] = "Ford Mustang GT";
+    doc["points"] = 10;
+    doc["in_stock"] = true;
+
+    coll1->add(doc.dump());
+
+    std::vector<std::string> facets = {"in_stock"};
+
+    nlohmann::json results = coll1->search("Ford", {"title"}, "", facets, sort_fields, {0}, 10, 1,
+                                           token_ordering::FREQUENCY, {true}, 10, spp::sparse_hash_set<std::string>(),
+                                           spp::sparse_hash_set<std::string>(), 10,"",  30UL, 4UL, 
+                                           "", 1UL, "", "", {}, 3UL, "<mark>", "</mark>", {}, 
+                                           4294967295UL, true, false, true, "", false, 6000000UL, 4UL,
+                                           7UL, fallback, 4UL, {off}, 32767UL, 32767UL, 2UL, 2UL, false,
+                                           "", true, 0UL, max_score, 100UL, 0UL, 4294967295UL, true).get();
+
+    ASSERT_EQ(1, results["facet_counts"].size());
+    ASSERT_EQ(1, results["facet_counts"][0]["counts"].size());
+    ASSERT_EQ(1, results["facet_counts"][0]["stats"].size());
+    ASSERT_FLOAT_EQ(1, results["facet_counts"][0]["stats"]["total_values"].get<size_t>());
+
+    ASSERT_STREQ("in_stock", results["facet_counts"][0]["field_name"].get<std::string>().c_str());
+    ASSERT_EQ(2, (int) results["facet_counts"][0]["counts"][0]["count"]);
+    ASSERT_STREQ("true", results["facet_counts"][0]["counts"][0]["value"].get<std::string>().c_str());
+
+    collectionManager.drop_collection("coll1");
+}
+
+TEST_F(CollectionOptimizedFacetingTest, FacetCountsFloatPrecision) {
+    Collection *coll1;
+
+    std::vector<field> fields = {field("title", field_types::STRING, false),
+                                 field("points", field_types::FLOAT, true)};
+
+    std::vector<sort_by> sort_fields = {sort_by("points", "DESC")};
+
+    coll1 = collectionManager.get_collection("coll1").get();
+    if (coll1 == nullptr) {
+        coll1 = collectionManager.create_collection("coll1", 4, fields, "points").get();
+    }
+
+    nlohmann::json doc;
+    doc["id"] = "100";
+    doc["title"] = "Ford Mustang";
+    doc["points"] = 113.4;
+
+    coll1->add(doc.dump());
+
+    std::vector<std::string> facets = {"points"};
+
+    nlohmann::json results = coll1->search("*", {"title"}, "", facets, sort_fields, {0}, 10, 1,
+                                           token_ordering::FREQUENCY, {true}, 10, spp::sparse_hash_set<std::string>(),
+                                           spp::sparse_hash_set<std::string>(), 10,"",  30UL, 4UL, 
+                                           "", 1UL, "", "", {}, 3UL, "<mark>", "</mark>", {}, 
+                                           4294967295UL, true, false, true, "", false, 6000000UL, 4UL,
+                                           7UL, fallback, 4UL, {off}, 32767UL, 32767UL, 2UL, 2UL, false,
+                                           "", true, 0UL, max_score, 100UL, 0UL, 4294967295UL, true).get();
+
+    ASSERT_EQ(1, results["facet_counts"].size());
+    ASSERT_EQ(1, results["facet_counts"][0]["counts"].size());
+
+    ASSERT_STREQ("points", results["facet_counts"][0]["field_name"].get<std::string>().c_str());
+    ASSERT_EQ(1, (int) results["facet_counts"][0]["counts"][0]["count"]);
+    ASSERT_STREQ("113.4", results["facet_counts"][0]["counts"][0]["value"].get<std::string>().c_str());
+    ASSERT_STREQ("113.4",results["facet_counts"][0]["counts"][0]["highlighted"].get<std::string>().c_str());
+
+    collectionManager.drop_collection("coll1");
+}
+
+TEST_F(CollectionOptimizedFacetingTest, FacetStatOnFloatFields) {
+    Collection *coll_float_fields;
+
+    std::ifstream infile(std::string(ROOT_DIR)+"test/float_documents.jsonl");
+    std::vector<field> fields = {
+            field("title", field_types::STRING, false),
+            field("score", field_types::FLOAT, false),
+            field("average", field_types::FLOAT, true)
+    };
+
+    std::vector<sort_by> sort_fields_desc = { sort_by("average", "DESC") };
+
+    coll_float_fields = collectionManager.get_collection("coll_float_fields").get();
+    if(coll_float_fields == nullptr) {
+        coll_float_fields = collectionManager.create_collection("coll_float_fields", 4, fields, "average").get();
+    }
+
+    std::string json_line;
+
+    while (std::getline(infile, json_line)) {
+        coll_float_fields->add(json_line);
+    }
+
+    infile.close();
+
+    query_fields = {"title"};
+    auto res_op = coll_float_fields->search("Jeremy", query_fields, "", {"average"}, sort_fields_desc, {0}, 10,
+                                            1, FREQUENCY, {false}, 10, spp::sparse_hash_set<std::string>(),
+                                            spp::sparse_hash_set<std::string>(), 10, "", 30UL, 4UL, 
+                                            "", 1UL, "", "", {}, 3UL, "<mark>", "</mark>", {}, 
+                                            4294967295UL, true, false, true, "", false, 6000000UL, 4UL,
+                                            7UL, fallback, 4UL, {off}, 32767UL, 32767UL, 2UL, 2UL, false,
+                                            "", true, 0UL, max_score, 100UL, 0UL, 4294967295UL, true);
+
+    auto results = res_op.get();
+
+    ASSERT_EQ(7, results["hits"].size());
+
+    ASSERT_EQ(5, results["facet_counts"][0]["stats"].size());
+    ASSERT_FLOAT_EQ(-21.3799991607666, results["facet_counts"][0]["stats"]["min"].get<double>());
+    ASSERT_FLOAT_EQ(300, results["facet_counts"][0]["stats"]["max"].get<double>());
+    ASSERT_FLOAT_EQ(277.8160007725237, results["facet_counts"][0]["stats"]["sum"].get<double>());
+    ASSERT_FLOAT_EQ(39.68800011036053, results["facet_counts"][0]["stats"]["avg"].get<double>());
+    ASSERT_FLOAT_EQ(7, results["facet_counts"][0]["stats"]["total_values"].get<size_t>());
+}
+
+TEST_F(CollectionOptimizedFacetingTest, FacetCountOnSimilarStrings) {
+    Collection *coll1;
+
+    std::vector<field> fields = {field("categories", field_types::STRING_ARRAY, true),
+                                 field("points", field_types::INT32, true)};
+
+    std::vector<sort_by> sort_fields = {sort_by("points", "DESC")};
+
+    coll1 = collectionManager.get_collection("coll1").get();
+    if (coll1 == nullptr) {
+        coll1 = collectionManager.create_collection("coll1", 4, fields, "points").get();
+    }
+
+    nlohmann::json doc;
+    doc["id"] = "100";
+    doc["categories"] = {"England in India"};
+    doc["points"] = 25;
+
+    coll1->add(doc.dump());
+
+    doc["id"] = "101";
+    doc["categories"] = {"India in England"};
+    doc["points"] = 50;
+
+    coll1->add(doc.dump());
+
+    std::vector<std::string> facets = {"categories"};
+
+    nlohmann::json results = coll1->search("*", {"categories"}, "", facets, sort_fields, {0}, 10, 1,
+                                           token_ordering::FREQUENCY, {true}, 10, spp::sparse_hash_set<std::string>(),
+                                           spp::sparse_hash_set<std::string>(), 10, "", 30UL, 4UL, 
+                                            "", 1UL, "", "", {}, 3UL, "<mark>", "</mark>", {}, 
+                                            4294967295UL, true, false, true, "", false, 6000000UL, 4UL,
+                                            7UL, fallback, 4UL, {off}, 32767UL, 32767UL, 2UL, 2UL, false,
+                                            "", true, 0UL, max_score, 100UL, 0UL, 4294967295UL, true).get();
+
+    ASSERT_EQ(2, results["hits"].size());
+    ASSERT_EQ(2, results["facet_counts"][0]["counts"].size());
+
+    ASSERT_STREQ("India in England", results["facet_counts"][0]["counts"][0]["value"].get<std::string>().c_str());
+    ASSERT_STREQ("England in India", results["facet_counts"][0]["counts"][1]["value"].get<std::string>().c_str());
+
+    collectionManager.drop_collection("coll1");
+}
+
+TEST_F(CollectionOptimizedFacetingTest, FacetByNestedIntField) {
+    nlohmann::json schema = R"({
+        "name": "coll1",
+        "enable_nested_fields": true,
+        "fields": [
+          {"name": "details", "type": "object", "optional": false },
+          {"name": "company.num_employees", "type": "int32", "optional": false, "facet": true },
+          {"name": "companyRank", "type": "int32", "optional": false, "facet": true }
+        ]
+    })"_json;
+
+    auto op = collectionManager.create_collection(schema);
+    ASSERT_TRUE(op.ok());
+    Collection* coll1 = op.get();
+
+    auto doc1 = R"({
+        "details": {"count": 1000},
+        "company": {"num_employees": 2000},
+        "companyRank": 100
+    })"_json;
+
+    auto doc2 = R"({
+        "details": {"count": 2000},
+        "company": {"num_employees": 2000},
+        "companyRank": 101
+    })"_json;
+
+    ASSERT_TRUE(coll1->add(doc1.dump(), CREATE).ok());
+    ASSERT_TRUE(coll1->add(doc2.dump(), CREATE).ok());
+
+    std::vector<sort_by> sort_fields = { sort_by("details.count", "ASC") };
+
+    auto results = coll1->search("*", {}, "", {"company.num_employees"}, sort_fields, {0}, 10, 1,
+                                 token_ordering::FREQUENCY, {true}, 10, spp::sparse_hash_set<std::string>(),
+                                 spp::sparse_hash_set<std::string>(), 10, "", 30, 4,  "", 1UL, "", "", {}, 3UL, 
+                                 "<mark>", "</mark>", {}, 4294967295UL, true, false, true, "", false, 6000000UL, 
+                                 4UL, 7UL, fallback, 4UL, {off}, 32767UL, 32767UL, 2UL, 2UL, false,
+                                 "", true, 0UL, max_score, 100UL, 0UL, 4294967295UL, true).get();
+
+    ASSERT_EQ(2, results["found"].get<size_t>());
+    ASSERT_EQ(1, results["facet_counts"].size());
+    ASSERT_EQ("company.num_employees", results["facet_counts"][0]["field_name"]);
+    ASSERT_EQ(1, results["facet_counts"][0]["counts"].size());
+    ASSERT_EQ(2, results["facet_counts"][0]["counts"][0]["count"].get<size_t>());
+    ASSERT_EQ("2000", results["facet_counts"][0]["counts"][0]["value"].get<std::string>());
+
+    // Nested wildcard faceting
+    std::vector<facet> wildcard_facets;
+    coll1->parse_facet("company.*", wildcard_facets);
+
+    ASSERT_EQ(1, wildcard_facets.size());
+    ASSERT_EQ("company.num_employees", wildcard_facets[0].field_name);
+
+    wildcard_facets.clear();
+    coll1->parse_facet("company*", wildcard_facets);
+
+    ASSERT_EQ(2, wildcard_facets.size());
+    ASSERT_EQ("company.num_employees", wildcard_facets[0].field_name);
+    ASSERT_EQ("companyRank", wildcard_facets[1].field_name);
+}
+
+TEST_F(CollectionOptimizedFacetingTest, FacetParseTest){
+    std::vector<field> fields = {
+            field("score", field_types::INT32, true),
+            field("grade", field_types::INT32, true),
+            field("rank", field_types::INT32, true),
+            field("range", field_types::INT32, true),
+            field("scale", field_types::INT32, false),
+    };
+
+    Collection* coll1 = collectionManager.create_collection("coll1", 1, fields).get();
+
+    std::vector<std::string> range_facet_fields {
+            "score(fail:[0, 40], pass:[40, 100])",
+            "grade(A:[80, 100], B:[60, 80], C:[40, 60])"
+    };
+    std::vector<facet> range_facets;
+    for(const std::string & facet_field: range_facet_fields) {
+        coll1->parse_facet(facet_field, range_facets);
+    }
+    ASSERT_EQ(2, range_facets.size());
+
+    ASSERT_STREQ("score", range_facets[0].field_name.c_str());
+    ASSERT_TRUE(range_facets[0].is_range_query);
+    ASSERT_GT(range_facets[0].facet_range_map.size(), 0);
+
+    ASSERT_STREQ("grade", range_facets[1].field_name.c_str());
+    ASSERT_TRUE(range_facets[1].is_range_query);
+    ASSERT_GT(range_facets[1].facet_range_map.size(), 0);
+
+    std::vector<std::string> normal_facet_fields {
+            "score",
+            "grade"
+    };
+    std::vector<facet> normal_facets;
+    for(const std::string & facet_field: normal_facet_fields) {
+        coll1->parse_facet(facet_field, normal_facets);
+    }
+    ASSERT_EQ(2, normal_facets.size());
+
+    ASSERT_STREQ("score", normal_facets[0].field_name.c_str());
+    ASSERT_STREQ("grade", normal_facets[1].field_name.c_str());
+
+    std::vector<std::string> wildcard_facet_fields {
+            "ran*",
+            "sc*",
+    };
+    std::vector<facet> wildcard_facets;
+    for(const std::string & facet_field: wildcard_facet_fields) {
+        coll1->parse_facet(facet_field, wildcard_facets);
+    }
+
+    ASSERT_EQ(3, wildcard_facets.size());
+
+    std::set<std::string> expected{"range", "rank", "score"};
+    for (size_t i = 0; i < wildcard_facets.size(); i++) {
+        ASSERT_TRUE(expected.count(wildcard_facets[i].field_name) == 1);
+    }
+
+    wildcard_facets.clear();
+    coll1->parse_facet("*", wildcard_facets);
+
+    // Last field is not a facet.
+    ASSERT_EQ(fields.size() - 1, wildcard_facets.size());
+
+    expected.clear();
+    for (size_t i = 0; i < fields.size() - 1; i++) {
+        expected.insert(fields[i].name);
+    }
+
+    for (size_t i = 0; i < wildcard_facets.size(); i++) {
+        ASSERT_TRUE(expected.count(wildcard_facets[i].field_name) == 1);
+    }
+
+    std::vector<std::string> mixed_facet_fields {
+            "score",
+            "grade(A:[80, 100], B:[60, 80], C:[40, 60])",
+            "ra*",
+    };
+
+    std::vector<facet> mixed_facets;
+    for(const std::string & facet_field: mixed_facet_fields) {
+        coll1->parse_facet(facet_field, mixed_facets);
+    }
+    ASSERT_EQ(4, mixed_facets.size());
+
+    std::vector<facet*> mixed_facets_ptr;
+    for(auto& f: mixed_facets) {
+        mixed_facets_ptr.push_back(&f);
+    }
+
+    std::sort(mixed_facets_ptr.begin(), mixed_facets_ptr.end(), [](const facet* f1, const facet* f2) {
+        return f1->field_name < f2->field_name;
+    });
+
+    ASSERT_EQ("score", mixed_facets_ptr[3]->field_name);
+
+    ASSERT_EQ("grade", mixed_facets_ptr[0]->field_name);
+    ASSERT_TRUE(mixed_facets_ptr[0]->is_range_query);
+    ASSERT_GT(mixed_facets_ptr[0]->facet_range_map.size(), 0);
+
+    ASSERT_EQ("rank", mixed_facets_ptr[2]->field_name);
+    ASSERT_EQ("range", mixed_facets_ptr[1]->field_name);
+}
+
+TEST_F(CollectionOptimizedFacetingTest, RangeFacetTest) {
+    std::vector<field> fields = {field("place", field_types::STRING, false),
+                                 field("state", field_types::STRING, false),
+                                 field("visitors", field_types::INT32, true),
+                                 field("trackingFrom", field_types::INT32, true),};
+    Collection* coll1 = collectionManager.create_collection(
+            "coll1", 1, fields, "", 0, "", {}, {}
+    ).get();
+    nlohmann::json doc1;
+    doc1["id"] = "0";
+    doc1["place"] = "Mysore Palace";
+    doc1["state"] = "Karnataka";
+    doc1["visitors"] = 235486;
+    doc1["trackingFrom"] = 1900;
+
+    nlohmann::json doc2;
+    doc2["id"] = "1";
+    doc2["place"] = "Hampi";
+    doc2["state"] = "Karnataka";
+    doc2["visitors"] = 187654;
+    doc2["trackingFrom"] = 1900;
+
+    nlohmann::json doc3;
+    doc3["id"] = "2";
+    doc3["place"] = "Mahabalipuram";
+    doc3["state"] = "TamilNadu";
+    doc3["visitors"] = 174684;
+    doc3["trackingFrom"] = 1900;
+
+    nlohmann::json doc4;
+    doc4["id"] = "3";
+    doc4["place"] = "Meenakshi Amman Temple";
+    doc4["state"] = "TamilNadu";
+    doc4["visitors"] = 246676;
+    doc4["trackingFrom"] = 2000;
+
+    nlohmann::json doc5;
+    doc5["id"] = "4";
+    doc5["place"] = "Staue of Unity";
+    doc5["state"] = "Gujarat";
+    doc5["visitors"] = 345878;
+    doc5["trackingFrom"] = 2000;
+
+    ASSERT_TRUE(coll1->add(doc1.dump()).ok());
+    ASSERT_TRUE(coll1->add(doc2.dump()).ok());
+    ASSERT_TRUE(coll1->add(doc3.dump()).ok());
+    ASSERT_TRUE(coll1->add(doc4.dump()).ok());
+    ASSERT_TRUE(coll1->add(doc5.dump()).ok());
+
+    auto results = coll1->search("Karnataka", {"state"},
+                                 "", {"visitors(Busy:[0, 200000], VeryBusy:[200000, 500000])"},
+                                 {}, {2}, 10,
+                                 1, FREQUENCY, {true},
+                                 10, spp::sparse_hash_set<std::string>(),
+                                 spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 10, {}, {}, {}, 0,
+                                 "<mark>", "</mark>", {}, 1000,
+                                 true, false, true, "", true,  6000000UL, 
+                                 4UL, 7UL, fallback, 4UL, {off}, 32767UL, 32767UL, 2UL, 2UL, false,
+                                 "", true, 0UL, max_score, 100UL, 0UL, 4294967295UL, true).get();
+ 
+    ASSERT_EQ(2, results["facet_counts"][0]["counts"].size());
+    ASSERT_EQ(1, (int) results["facet_counts"][0]["counts"][0]["count"]);
+    ASSERT_EQ("Busy", results["facet_counts"][0]["counts"][0]["value"].get<std::string>());
+    ASSERT_EQ(1, (int) results["facet_counts"][0]["counts"][1]["count"]);
+    ASSERT_EQ("VeryBusy", results["facet_counts"][0]["counts"][1]["value"].get<std::string>());
+
+    auto results2 = coll1->search("Gujarat", {"state"},
+                                  "", {"visitors(Busy:[0, 200000], VeryBusy:[200000, 500000])"},
+                                  {}, {2}, 10,
+                                  1, FREQUENCY, {true},
+                                  10, spp::sparse_hash_set<std::string>(),
+                                  spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 10, {}, {}, {}, 0,
+                                  "<mark>", "</mark>", {}, 1000,
+                                  true, false, true, "", true,  6000000UL, 
+                                 4UL, 7UL, fallback, 4UL, {off}, 32767UL, 32767UL, 2UL, 2UL, false,
+                                 "", true, 0UL, max_score, 100UL, 0UL, 4294967295UL, true).get();
+
+    ASSERT_EQ(1, results2["facet_counts"][0]["counts"].size());
+    ASSERT_EQ(1, results2["facet_counts"][0]["counts"][0]["count"].get<std::size_t>());
+    ASSERT_STREQ("VeryBusy", results2["facet_counts"][0]["counts"][0]["value"].get<std::string>().c_str());
+    ASSERT_TRUE(results2["facet_counts"][0]["counts"][1]["value"] == nullptr);
+
+    // ensure that unknown facet field are handled
+
+    auto results3 = coll1->search("Gujarat", {"state"},
+                             "", {"visitorsz(Busy:[0, 200000], VeryBusy:[200000, 500000])"},
+                             {}, {2}, 10,
+                             1, FREQUENCY, {true},
+                             10, spp::sparse_hash_set<std::string>(),
+                             spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 10, {}, {}, {}, 0,
+                             "<mark>", "</mark>", {}, 1000, true, false, true, "", true,  6000000UL, 4UL,
+                             7UL, fallback, 4UL, {off}, 32767UL, 32767UL, 2UL, 2UL, false, "", true, 0UL, 
+                             max_score, 100UL, 0UL, 4294967295UL, true);
+
+    ASSERT_FALSE(results3.ok());
+    ASSERT_EQ("Could not find a facet field named `visitorsz` in the schema.", results3.error());
+
+    auto results4 = coll1->search("*", {"state"},
+                                  "", {"trackingFrom(Old:[0, 1910], New:[1910, 2100])"},
+                                  {}, {2}, 10,
+                                  1, FREQUENCY, {true},
+                                  10, spp::sparse_hash_set<std::string>(),
+                                  spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 10, {}, {}, {}, 0,
+                                  "<mark>", "</mark>", {}, 1000, true, false, true, "", true,  6000000UL, 
+                                  4UL, 7UL, fallback, 4UL, {off}, 32767UL, 32767UL, 2UL, 2UL, false,
+                                  "", true, 0UL, max_score, 100UL, 0UL, 4294967295UL, true).get();
+
+    ASSERT_EQ(2, results4["facet_counts"][0]["counts"].size());
+    ASSERT_EQ(3, results4["facet_counts"][0]["counts"][0]["count"].get<std::size_t>());
+    ASSERT_EQ("Old", results4["facet_counts"][0]["counts"][0]["value"].get<std::string>());
+
+    ASSERT_EQ(2, results4["facet_counts"][0]["counts"][1]["count"].get<std::size_t>());
+    ASSERT_EQ("New", results4["facet_counts"][0]["counts"][1]["value"].get<std::string>());
+
+    // ensure that only integer fields are allowed
+    auto rop = coll1->search("Karnataka", {"state"},
+                                 "", {"state(Busy:[0, 200000], VeryBusy:[200000, 500000])"},
+                                 {}, {2}, 10,
+                                 1, FREQUENCY, {true},
+                                 10, spp::sparse_hash_set<std::string>(),
+                                 spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 10, {}, {}, {}, 0,
+                                 "<mark>", "</mark>", {}, 1000,
+                                 true, false, true, "", true,  6000000UL, 
+                                 4UL, 7UL, fallback, 4UL, {off}, 32767UL, 32767UL, 2UL, 2UL, false,
+                                 "", true, 0UL, max_score, 100UL, 0UL, 4294967295UL, true);
+
+    ASSERT_FALSE(rop.ok());
+    ASSERT_EQ("Range facet is restricted to only int32 and int64 fields.", rop.error());
+
+    // ensure that bad facet range values are handled
+    rop = coll1->search("Karnataka", {"state"},
+                        "", {"visitors(Busy:[alpha, 200000], VeryBusy:[200000, beta])"},
+                        {}, {2}, 10,
+                        1, FREQUENCY, {true},
+                        10, spp::sparse_hash_set<std::string>(),
+                        spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 10, {}, {}, {}, 0,
+                        "<mark>", "</mark>", {}, 1000,
+                        true, false, true, "", true,  6000000UL, 
+                        4UL, 7UL, fallback, 4UL, {off}, 32767UL, 32767UL, 2UL, 2UL, false,
+                        "", true, 0UL, max_score, 100UL, 0UL, 4294967295UL, true);
+
+    ASSERT_FALSE(rop.ok());
+    ASSERT_EQ("Facet range value is not valid.", rop.error());
+
+    collectionManager.drop_collection("coll1");
+}
+
+TEST_F(CollectionOptimizedFacetingTest, RangeFacetContinuity) {
+    std::vector<field> fields = {field("place", field_types::STRING, false),
+                                 field("state", field_types::STRING, false),
+                                 field("visitors", field_types::INT32, true),};
+    Collection* coll1 = collectionManager.create_collection(
+            "coll1", 1, fields, "", 0, "", {}, {}
+    ).get();
+    nlohmann::json doc1;
+    doc1["id"] = "0";
+    doc1["place"] = "Mysore Palace";
+    doc1["state"] = "Karnataka";
+    doc1["visitors"] = 235486;
+
+    nlohmann::json doc2;
+    doc2["id"] = "1";
+    doc2["place"] = "Hampi";
+    doc2["state"] = "Karnataka";
+    doc2["visitors"] = 187654;
+
+    nlohmann::json doc3;
+    doc3["id"] = "2";
+    doc3["place"] = "Mahabalipuram";
+    doc3["state"] = "TamilNadu";
+    doc3["visitors"] = 174684;
+
+    nlohmann::json doc4;
+    doc4["id"] = "3";
+    doc4["place"] = "Meenakshi Amman Temple";
+    doc4["state"] = "TamilNadu";
+    doc4["visitors"] = 246676;
+
+    nlohmann::json doc5;
+    doc5["id"] = "4";
+    doc5["place"] = "Staue of Unity";
+    doc5["state"] = "Gujarat";
+    doc5["visitors"] = 345878;
+
+
+    ASSERT_TRUE(coll1->add(doc1.dump()).ok());
+    ASSERT_TRUE(coll1->add(doc2.dump()).ok());
+    ASSERT_TRUE(coll1->add(doc3.dump()).ok());
+    ASSERT_TRUE(coll1->add(doc4.dump()).ok());
+    ASSERT_TRUE(coll1->add(doc5.dump()).ok());
+
+    auto results = coll1->search("TamilNadu", {"state"},
+                                 "", {"visitors(Busy:[0, 200000], VeryBusy:[200001, 500000])"},
+                                 {}, {2}, 10,
+                                 1, FREQUENCY, {true},
+                                 10, spp::sparse_hash_set<std::string>(),
+                                 spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 10, {}, {}, {}, 0,
+                                 "<mark>", "</mark>", {}, 1000,
+                                 true, false, true, "", true,  6000000UL, 
+                                 4UL, 7UL, fallback, 4UL, {off}, 32767UL, 32767UL, 2UL, 2UL, false,
+                                 "", true, 0UL, max_score, 100UL, 0UL, 4294967295UL, true);
+
+    ASSERT_STREQ("Ranges in range facet syntax should be continous.", results.error().c_str());
+
+    auto results2 = coll1->search("TamilNadu", {"state"},
+                                  "", {"visitors(Busy:[0, 200000], VeryBusy:[199999, 500000])"},
+                                  {}, {2}, 10,
+                                  1, FREQUENCY, {true},
+                                  10, spp::sparse_hash_set<std::string>(),
+                                  spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 10, {}, {}, {}, 0,
+                                  "<mark>", "</mark>", {}, 1000,
+                                  true, false, true, "", true,  6000000UL, 
+                                 4UL, 7UL, fallback, 4UL, {off}, 32767UL, 32767UL, 2UL, 2UL, false,
+                                 "", true, 0UL, max_score, 100UL, 0UL, 4294967295UL, true);
+
+    ASSERT_STREQ("Ranges in range facet syntax should be continous.", results2.error().c_str());
+
+    collectionManager.drop_collection("coll1");
+}
+
+TEST_F(CollectionOptimizedFacetingTest, RangeFacetTypo) {
+    std::vector<field> fields = {field("place", field_types::STRING, false),
+                                 field("state", field_types::STRING, false),
+                                 field("visitors", field_types::INT32, true),};
+    Collection* coll1 = collectionManager.create_collection(
+            "coll1", 1, fields, "", 0, "", {}, {}
+    ).get();
+    nlohmann::json doc1;
+    doc1["id"] = "0";
+    doc1["place"] = "Mysore Palace";
+    doc1["state"] = "Karnataka";
+    doc1["visitors"] = 235486;
+
+    nlohmann::json doc2;
+    doc2["id"] = "1";
+    doc2["place"] = "Hampi";
+    doc2["state"] = "Karnataka";
+    doc2["visitors"] = 187654;
+
+    nlohmann::json doc3;
+    doc3["id"] = "2";
+    doc3["place"] = "Mahabalipuram";
+    doc3["state"] = "TamilNadu";
+    doc3["visitors"] = 174684;
+
+    nlohmann::json doc4;
+    doc4["id"] = "3";
+    doc4["place"] = "Meenakshi Amman Temple";
+    doc4["state"] = "TamilNadu";
+    doc4["visitors"] = 246676;
+
+    nlohmann::json doc5;
+    doc5["id"] = "4";
+    doc5["place"] = "Staue of Unity";
+    doc5["state"] = "Gujarat";
+    doc5["visitors"] = 345878;
+
+
+    ASSERT_TRUE(coll1->add(doc1.dump()).ok());
+    ASSERT_TRUE(coll1->add(doc2.dump()).ok());
+    ASSERT_TRUE(coll1->add(doc3.dump()).ok());
+    ASSERT_TRUE(coll1->add(doc4.dump()).ok());
+    ASSERT_TRUE(coll1->add(doc5.dump()).ok());
+
+    auto results = coll1->search("TamilNadu", {"state"},
+                                 "", {"visitors(Busy:[0, 200000], VeryBusy:[200000, 500000)"}, //missing ']' at end
+                                 {}, {2}, 10,
+                                 1, FREQUENCY, {true},
+                                 10, spp::sparse_hash_set<std::string>(),
+                                 spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 10, {}, {}, {}, 0,
+                                 "<mark>", "</mark>", {}, 1000,
+                                 true, false, true, "", true,  6000000UL, 
+                                 4UL, 7UL, fallback, 4UL, {off}, 32767UL, 32767UL, 2UL, 2UL, false,
+                                 "", true, 0UL, max_score, 100UL, 0UL, 4294967295UL, true);
+
+    ASSERT_STREQ("Error splitting the facet range values.", results.error().c_str());
+
+    auto results2 = coll1->search("TamilNadu", {"state"},
+                                  "", {"visitors(Busy:[0, 200000], VeryBusy:200000, 500000])"}, //missing '[' in second range
+                                  {}, {2}, 10,
+                                  1, FREQUENCY, {true},
+                                  10, spp::sparse_hash_set<std::string>(),
+                                  spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 10, {}, {}, {}, 0,
+                                  "<mark>", "</mark>", {}, 1000,
+                                  true, false, true, "", true,  6000000UL, 
+                                 4UL, 7UL, fallback, 4UL, {off}, 32767UL, 32767UL, 2UL, 2UL, false,
+                                 "", true, 0UL, max_score, 100UL, 0UL, 4294967295UL, true);
+
+    ASSERT_STREQ("Error splitting the facet range values.", results2.error().c_str());
+
+    auto results3 = coll1->search("TamilNadu", {"state"},
+                                  "", {"visitors(Busy:[0, 200000] VeryBusy:[200000, 500000])"}, //missing ',' between ranges
+                                  {}, {2}, 10,
+                                  1, FREQUENCY, {true},
+                                  10, spp::sparse_hash_set<std::string>(),
+                                  spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 10, {}, {}, {}, 0,
+                                  "<mark>", "</mark>", {}, 1000,
+                                  true, false, true, "", true,  6000000UL, 
+                                 4UL, 7UL, fallback, 4UL, {off}, 32767UL, 32767UL, 2UL, 2UL, false,
+                                 "", true, 0UL, max_score, 100UL, 0UL, 4294967295UL, true);
+
+    ASSERT_STREQ("Error splitting the facet range values.", results3.error().c_str());
+
+    auto results4 = coll1->search("TamilNadu", {"state"},
+                                  "", {"visitors(Busy:[0 200000], VeryBusy:[200000, 500000])"}, //missing ',' between first ranges values
+                                  {}, {2}, 10,
+                                  1, FREQUENCY, {true},
+                                  10, spp::sparse_hash_set<std::string>(),
+                                  spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 10, {}, {}, {}, 0,
+                                  "<mark>", "</mark>", {}, 1000,
+                                  true, false, true, "", true,  6000000UL, 
+                                 4UL, 7UL, fallback, 4UL, {off}, 32767UL, 32767UL, 2UL, 2UL, false,
+                                 "", true, 0UL, max_score, 100UL, 0UL, 4294967295UL, true);
+
+    ASSERT_STREQ("Facet range value is not valid.", results4.error().c_str());
+
+    auto results5 = coll1->search("TamilNadu", {"state"},
+                                  "", {"visitors(Busy:[0, 200000 VeryBusy:200000, 500000])"}, //missing '],' and '['
+                                  {}, {2}, 10,
+                                  1, FREQUENCY, {true},
+                                  10, spp::sparse_hash_set<std::string>(),
+                                  spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 10, {}, {}, {}, 0,
+                                  "<mark>", "</mark>", {}, 1000,
+                                  true, false, true, "", true,  6000000UL, 
+                                 4UL, 7UL, fallback, 4UL, {off}, 32767UL, 32767UL, 2UL, 2UL, false,
+                                 "", true, 0UL, max_score, 100UL, 0UL, 4294967295UL, true);
+
+    ASSERT_STREQ("Facet range value is not valid.", results5.error().c_str());
+
+    collectionManager.drop_collection("coll1");
+}
+
+TEST_F(CollectionOptimizedFacetingTest, SampleFacetCounts) {
+    nlohmann::json schema = R"({
+            "name": "coll1",
+            "fields": [
+                {"name": "color", "type": "string", "facet": true}
+            ]
+        })"_json;
+
+    Collection* coll1 = collectionManager.create_collection(schema).get();
+
+    std::mt19937 gen(137723); // use constant seed to make sure that counts don't jump around
+    std::uniform_int_distribution<> distr(1, 100); // 1 to 100 inclusive
+
+    size_t count_blue = 0, count_red = 0;
+
+    for(size_t i = 0; i < 1000; i++) {
+        nlohmann::json doc;
+        if(distr(gen) % 2 == 0) {
+            doc["color"] = "blue";
+            count_blue++;
+        } else {
+            doc["color"] = "red";
+            count_red++;
+        }
+
+        ASSERT_TRUE(coll1->add(doc.dump()).ok());
+    }
+
+    auto res = coll1->search("*", {}, "", {"color"}, {}, {0}, 3, 1, FREQUENCY, {true}, 5,
+                             spp::sparse_hash_set<std::string>(),
+                             spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 20, {}, {}, {}, 0,
+                             "<mark>", "</mark>", {}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7, fallback,
+                             4, {off}, 3, 3, 2, 2, false, "", true, 0, max_score, 10, 0, 4294967295UL, true).get();
+
+    ASSERT_EQ(1000, res["found"].get<size_t>());
+    ASSERT_EQ(1, res["facet_counts"].size());
+    ASSERT_EQ(2, res["facet_counts"][0]["counts"].size());
+
+    // verify approximate counts
+    ASSERT_GE(res["facet_counts"][0]["counts"][0]["count"].get<size_t>(), 250);
+    ASSERT_GE(res["facet_counts"][0]["counts"][1]["count"].get<size_t>(), 250);
+    ASSERT_TRUE(res["facet_counts"][0]["sampled"].get<bool>());
+
+    // when sample threshold is high, don't estimate
+    res = coll1->search("*", {}, "", {"color"}, {}, {0}, 3, 1, FREQUENCY, {true}, 5,
+                        spp::sparse_hash_set<std::string>(),
+                        spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 20, {}, {}, {}, 0,
+                        "<mark>", "</mark>", {}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7, fallback,
+                        4, {off}, 3, 3, 2, 2, false, "", true, 0, max_score, 10, 10000, 4294967295UL, true).get();
+
+    ASSERT_EQ(1000, res["found"].get<size_t>());
+    ASSERT_EQ(1, res["facet_counts"].size());
+    ASSERT_EQ(2, res["facet_counts"][0]["counts"].size());
+
+    for(size_t i = 0; i < res["facet_counts"][0]["counts"].size(); i++) {
+        if(res["facet_counts"][0]["counts"][i]["value"].get<std::string>() == "red") {
+            ASSERT_EQ(count_red, res["facet_counts"][0]["counts"][i]["count"].get<size_t>());
+        } else {
+            ASSERT_EQ(count_blue, res["facet_counts"][0]["counts"][i]["count"].get<size_t>());
+        }
+    }
+
+    ASSERT_FALSE(res["facet_counts"][0]["sampled"].get<bool>());
+
+    // test for sample percent > 100
+
+    auto res_op = coll1->search("*", {}, "", {"color"}, {}, {0}, 3, 1, FREQUENCY, {true}, 5,
+                                spp::sparse_hash_set<std::string>(),
+                                spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 20, {}, {}, {}, 0,
+                                "<mark>", "</mark>", {}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7, fallback,
+                                4, {off}, 3, 3, 2, 2, false, "", true, 0, max_score, 200, 0, 4294967295UL, true);
+
+    ASSERT_FALSE(res_op.ok());
+    ASSERT_EQ("Value of `facet_sample_percent` must be less than 100.", res_op.error());
+}
+
+TEST_F(CollectionOptimizedFacetingTest, FacetOnArrayFieldWithSpecialChars) {
+    std::vector<field> fields = {
+            field("tags", field_types::STRING_ARRAY, true),
+            field("points", field_types::INT32, true),
+    };
+
+    Collection* coll1 = collectionManager.create_collection("coll1", 1, fields).get();
+
+    nlohmann::json doc;
+    doc["tags"] = {"gamma"};
+    doc["points"] = 10;
+    ASSERT_TRUE(coll1->add(doc.dump()).ok());
+
+    doc["tags"] = {"alpha", "| . |", "beta", "gamma"};
+    doc["points"] = 10;
+    ASSERT_TRUE(coll1->add(doc.dump()).ok());
+
+    auto results = coll1->search("*", {},
+                                 "", {"tags"}, {}, {2}, 10, 1, FREQUENCY, {true}, 1, spp::sparse_hash_set<std::string>(),
+                                spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 20, {}, {}, {}, 0,
+                                "<mark>", "</mark>", {}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7, fallback,
+                                4, {off}, 3, 3, 2, 2, false, "", true, 0, max_score, 100, 0, 4294967295UL, true).get();
+
+    ASSERT_EQ(1, results["facet_counts"].size());
+    ASSERT_EQ(4, results["facet_counts"][0]["counts"].size());
+
+    for(size_t i = 0; i < results["facet_counts"][0]["counts"].size(); i++) {
+        auto fvalue = results["facet_counts"][0]["counts"][i]["value"].get<std::string>();
+        if(fvalue == "gamma") {
+            ASSERT_EQ(2, results["facet_counts"][0]["counts"][i]["count"].get<size_t>());
+        } else {
+            ASSERT_EQ(1, results["facet_counts"][0]["counts"][i]["count"].get<size_t>());
+        }
+    }
+}
+
+TEST_F(CollectionOptimizedFacetingTest, StringLengthTest) {
+    std::vector<field> fields = {
+            field("tags", field_types::STRING_ARRAY, true),
+            field("points", field_types::INT32, true),
+    };
+
+    Collection* coll1 = collectionManager.create_collection("coll1", 1, fields).get();
+
+    nlohmann::json doc;
+    doc["tags"] = {"gamma"};
+    doc["points"] = 10;
+    ASSERT_TRUE(coll1->add(doc.dump()).ok());
+
+    doc["tags"] = {"beta"};
+    doc["points"] = 10;
+    ASSERT_TRUE(coll1->add(doc.dump()).ok());
+
+    doc["tags"] = {"alpha"};
+    doc["points"] = 10;
+    ASSERT_TRUE(coll1->add(doc.dump()).ok());
+
+    std::string longStr = "";
+    for(auto i = 0; i < 8; ++i) {
+        longStr+="alphabetagamma";
+    }
+
+    ASSERT_TRUE(112 == longStr.size());
+    
+    std::vector<std::string> vec;
+    vec.emplace_back(longStr);
+    doc["tags"] = vec;
+    doc["points"] = 10;
+    ASSERT_TRUE(coll1->add(doc.dump()).ok());
+
+    auto results = coll1->search("*", {},
+                                 "", {"tags"}, {}, {2}, 10, 1, FREQUENCY, {true}, 1, spp::sparse_hash_set<std::string>(),
+                                spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 20, {}, {}, {}, 0,
+                                "<mark>", "</mark>", {}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7, fallback,
+                                4, {off}, 3, 3, 2, 2, false, "", true, 0, max_score, 100, 0, 4294967295UL, true).get();
+
+
+    ASSERT_EQ(1, results["facet_counts"].size());
+    ASSERT_EQ(4, results["facet_counts"][0]["counts"].size());
+
+    longStr = results["facet_counts"][0]["counts"][3]["value"];
+
+    //string facet length is restricted to 100
+    ASSERT_TRUE(100 == longStr.size());
+}
\ No newline at end of file