For facet query, let max_hits default to 0.

2025-05-17 20:22:32 +08:00 · 2020-02-21 07:47:38 +05:30 · 2020-02-21 07:47:38 +05:30 · 654811f4a3
commit 654811f4a3
parent 5de270f4ae
4 changed files with 24 additions and 24 deletions
--- a/include/index.h
+++ b/include/index.h
@ -148,8 +148,7 @@ private:
    void search_field(const uint8_t & field_id, std::string & query,
                      const std::string & field, uint32_t *filter_ids, size_t filter_ids_length,
                      std::vector<facet> & facets, const std::vector<sort_by> & sort_fields,
-                      const int num_typos, const size_t num_results,
-                      std::vector<std::vector<art_leaf*>> & searched_queries,
+                      const int num_typos, std::vector<std::vector<art_leaf*>> & searched_queries,
                      Topster & topster, uint32_t** all_result_ids,
                      size_t & all_result_ids_len, const token_ordering token_order = FREQUENCY,
                      const bool prefix = false, const size_t drop_tokens_threshold = Index::DROP_TOKENS_THRESHOLD);
--- a/src/collection.cpp
+++ b/src/collection.cpp
@ -456,7 +456,7 @@ Option<nlohmann::json> Collection::search(const std::string & query, const std::

    // parse facet query
    std::vector<std::string> facet_query_vec;
-    facet_query_t facet_query;
+    facet_query_t facet_query = {"", ""};

    if(!simple_facet_query.empty() && simple_facet_query.find(':') == std::string::npos) {
        std::string error = "Facet query must be in the `facet_field: value` format.";
@ -503,7 +503,8 @@ Option<nlohmann::json> Collection::search(const std::string & query, const std::
        return Option<nlohmann::json>(422, message);
    }

-    const size_t num_results = (page * per_page);
+    const size_t results_per_page = std::min(per_page, max_hits);
+    const size_t num_results = (page * results_per_page);

    if(num_results > max_hits) {
        std::string message = "Only the first " + std::to_string(max_hits) + " results are available.";
@ -522,7 +523,7 @@ Option<nlohmann::json> Collection::search(const std::string & query, const std::
        index->search_params = search_args(query, search_fields, filters, facets,
                                           index_to_included_ids[index_id], index_to_excluded_ids[index_id],
                                           sort_fields_std, facet_query, num_typos, max_facet_values, max_hits,
-                                           per_page, page, token_order, prefix, drop_tokens_threshold);
+                                           results_per_page, page, token_order, prefix, drop_tokens_threshold);
        {
            std::lock_guard<std::mutex> lk(index->m);
            index->ready = true;
@ -607,13 +608,6 @@ Option<nlohmann::json> Collection::search(const std::string & query, const std::
    result["hits"] = nlohmann::json::array();
    result["found"] = total_found;

-    const int start_result_index = (page - 1) * per_page;
-    const int kvsize = raw_result_kvs.size() + override_result_kvs.size();
-
-    if(start_result_index > (kvsize - 1)) {
-        return Option<nlohmann::json>(result);
-    }
-
    std::vector<KV> result_kvs;
    size_t override_kv_index = 0;
    size_t raw_results_index = 0;
@ -641,10 +635,11 @@ Option<nlohmann::json> Collection::search(const std::string & query, const std::
        raw_results_index++;
    }

-    size_t end_result_index = std::min(num_results, result_kvs.size()) - 1;
+    const long start_result_index = (page - 1) * results_per_page;
+    const long end_result_index = std::min(num_results, result_kvs.size()) - 1;  // could be -1 when max_hits is 0

    // construct results array
-    for(size_t result_kvs_index = start_result_index; result_kvs_index <= end_result_index; result_kvs_index++) {
+    for(long result_kvs_index = start_result_index; result_kvs_index <= end_result_index; result_kvs_index++) {
        const auto & field_order_kv = result_kvs[result_kvs_index];
        const std::string& seq_id_key = get_seq_id_key((uint32_t) field_order_kv.key);

--- a/src/core_api.cpp
+++ b/src/core_api.cpp
@ -209,8 +209,17 @@ void get_search(http_req & req, http_res & res) {
        req.params[MAX_FACET_VALUES] = "10";
    }

+    if(req.params.count(FACET_QUERY) == 0) {
+        req.params[FACET_QUERY] = "";
+    }
+
    if(req.params.count(MAX_HITS) == 0) {
-        req.params[MAX_HITS] = "500";
+        // for facet query, let max hits be 0 if it is not explicitly set
+        if(req.params[FACET_QUERY].empty()) {
+            req.params[MAX_HITS] = "500";
+        } else {
+            req.params[MAX_HITS] = "0";
+        }
    }

    if(req.params.count(PER_PAGE) == 0) {
@ -253,10 +262,6 @@ void get_search(http_req & req, http_res & res) {
    std::vector<std::string> facet_fields;
    StringUtils::split(req.params[FACET_BY], facet_fields, ",");

-    if(req.params.count(FACET_QUERY) == 0) {
-        req.params[FACET_QUERY] = "";
-    }
-
    std::vector<std::string> include_fields_vec;
    StringUtils::split(req.params[INCLUDE_FIELDS], include_fields_vec, ",");

--- a/src/index.cpp
+++ b/src/index.cpp
@ -973,8 +973,9 @@ void Index::search(Option<uint32_t> & outcome,
    //auto begin = std::chrono::high_resolution_clock::now();
    uint32_t* all_result_ids = nullptr;

-    Topster topster(max_hits);
-    Topster curated_topster(max_hits);
+    const size_t topster_size = std::max((size_t)1, max_hits);  // needs to be atleast 1 since scoring is mandatory
+    Topster topster(topster_size);
+    Topster curated_topster(topster_size);

    if(query == "*") {
        const uint8_t field_id = (uint8_t)(FIELD_LIMIT_NUM - 0);
@ -994,7 +995,7 @@ void Index::search(Option<uint32_t> & outcome,
                const std::string & field = search_fields[i];

                search_field(field_id, query, field, filter_ids, filter_ids_length, facets, sort_fields_std,
-                             num_typos, num_results, searched_queries, topster, &all_result_ids, all_result_ids_len,
+                             num_typos, searched_queries, topster, &all_result_ids, all_result_ids_len,
                             token_order, prefix, drop_tokens_threshold);
                collate_curated_ids(query, field, field_id, included_ids, curated_topster, searched_queries);
            }
@ -1056,7 +1057,7 @@ void Index::search(Option<uint32_t> & outcome,
 void Index::search_field(const uint8_t & field_id, std::string & query, const std::string & field,
                         uint32_t *filter_ids, size_t filter_ids_length,
                         std::vector<facet> & facets, const std::vector<sort_by> & sort_fields, const int num_typos,
-                         const size_t num_results, std::vector<std::vector<art_leaf*>> & searched_queries,
+                         std::vector<std::vector<art_leaf*>> & searched_queries,
                         Topster & topster, uint32_t** all_result_ids, size_t & all_result_ids_len,
                         const token_ordering token_order, const bool prefix, const size_t drop_tokens_threshold) {
    std::vector<std::string> tokens;
@ -1204,7 +1205,7 @@ void Index::search_field(const uint8_t & field_id, std::string & query, const st
        }

        return search_field(field_id, truncated_query, field, filter_ids, filter_ids_length, facets, sort_fields, num_typos,
-                            num_results, searched_queries, topster, all_result_ids, all_result_ids_len,
+                            searched_queries, topster, all_result_ids, all_result_ids_len,
                            token_order, prefix);
    }
 }