From e6dad1682c4012f58998d02c3574d4c90f565b78 Mon Sep 17 00:00:00 2001 From: Kishore Nallan Date: Wed, 26 Jun 2019 18:57:01 +0530 Subject: [PATCH] Parameter for controlling max number of facets returned during search. --- include/collection.h | 3 ++- include/index.h | 7 ++++--- src/collection.cpp | 11 +++++------ src/core_api.cpp | 17 +++++++++++++---- test/collection_test.cpp | 22 +++++++++++++++++++++- 5 files changed, 45 insertions(+), 15 deletions(-) diff --git a/include/collection.h b/include/collection.h index 0552e2af..9dae85ae 100644 --- a/include/collection.h +++ b/include/collection.h @@ -128,7 +128,8 @@ public: const token_ordering token_order = FREQUENCY, const bool prefix = false, const size_t drop_tokens_threshold = Index::DROP_TOKENS_THRESHOLD, const spp::sparse_hash_set include_fields = spp::sparse_hash_set(), - const spp::sparse_hash_set exclude_fields = spp::sparse_hash_set()); + const spp::sparse_hash_set exclude_fields = spp::sparse_hash_set(), + const size_t max_facet_values=10); Option get(const std::string & id); diff --git a/include/index.h b/include/index.h index 2999f703..c527f969 100644 --- a/include/index.h +++ b/include/index.h @@ -28,6 +28,7 @@ struct search_args { std::vector facets; std::vector sort_fields_std; int num_typos; + size_t max_facet_values; size_t per_page; size_t page; token_ordering token_order; @@ -43,11 +44,11 @@ struct search_args { } search_args(std::string query, std::vector search_fields, std::vector filters, - std::vector facets, std::vector sort_fields_std, int num_typos, + std::vector facets, std::vector sort_fields_std, int num_typos, size_t max_facet_values, size_t per_page, size_t page, token_ordering token_order, bool prefix, size_t drop_tokens_threshold): query(query), search_fields(search_fields), filters(filters), facets(facets), - sort_fields_std(sort_fields_std), num_typos(num_typos), per_page(per_page), page(page), - token_order(token_order), prefix(prefix), drop_tokens_threshold(drop_tokens_threshold), + sort_fields_std(sort_fields_std), num_typos(num_typos), max_facet_values(max_facet_values), per_page(per_page), + page(page), token_order(token_order), prefix(prefix), drop_tokens_threshold(drop_tokens_threshold), all_result_ids_len(0), outcome(0) { } diff --git a/src/collection.cpp b/src/collection.cpp index 710aa64c..e230bfcd 100644 --- a/src/collection.cpp +++ b/src/collection.cpp @@ -280,7 +280,8 @@ Option Collection::search(std::string query, const std::vector include_fields, - const spp::sparse_hash_set exclude_fields) { + const spp::sparse_hash_set exclude_fields, + const size_t max_facet_values) { std::vector facets; // validate search fields @@ -447,8 +448,6 @@ Option Collection::search(std::string query, const std::vector(422, message); } - //auto begin = std::chrono::high_resolution_clock::now(); - // all search queries that were used for generating the results std::vector> searched_queries; std::vector::KV> field_order_kvs; @@ -457,14 +456,14 @@ Option Collection::search(std::string query, const std::vectorsearch_params = search_args(query, search_fields, filters, facets, sort_fields_std, - num_typos, per_page, page, token_order, prefix, drop_tokens_threshold); + num_typos, max_facet_values, per_page, page, token_order, prefix, + drop_tokens_threshold); { std::lock_guard lk(index->m); index->ready = true; index->processed = false; } index->cv.notify_one(); - //std::this_thread::sleep_for(std::chrono::milliseconds(400)); } Option index_search_op({}); // stores the last error across all index threads @@ -616,7 +615,7 @@ Option Collection::search(std::string query, const std::vector b.second; }); - for(size_t i = 0; i < std::min((size_t)100, value_to_count.size()); i++) { + for(size_t i = 0; i < std::min(max_facet_values, value_to_count.size()); i++) { auto & kv = value_to_count[i]; nlohmann::json facet_value_count = nlohmann::json::object(); facet_value_count["value"] = kv.first; diff --git a/src/core_api.cpp b/src/core_api.cpp index 2e61b29f..66f1f290 100644 --- a/src/core_api.cpp +++ b/src/core_api.cpp @@ -172,7 +172,10 @@ void get_search(http_req & req, http_res & res) { const char *QUERY = "q"; const char *QUERY_BY = "query_by"; const char *SORT_BY = "sort_by"; + const char *FACET_BY = "facet_by"; + const char *MAX_FACET_VALUES = "max_facet_values"; + const char *PER_PAGE = "per_page"; const char *PAGE = "page"; const char *CALLBACK = "callback"; @@ -200,6 +203,10 @@ void get_search(http_req & req, http_res & res) { return res.send_400(std::string("Parameter `") + QUERY_BY + "` is required."); } + if(req.params.count(MAX_FACET_VALUES) == 0) { + req.params[MAX_FACET_VALUES] = "10"; + } + if(req.params.count(PER_PAGE) == 0) { req.params[PER_PAGE] = "10"; } @@ -289,10 +296,12 @@ void get_search(http_req & req, http_res & res) { token_ordering token_order = (req.params[RANK_TOKENS_BY] == "DEFAULT_SORTING_FIELD") ? MAX_SCORE : FREQUENCY; Option result_op = collection->search(req.params[QUERY], search_fields, filter_str, facet_fields, - sort_fields, std::stoi(req.params[NUM_TYPOS]), - std::stoi(req.params[PER_PAGE]), std::stoi(req.params[PAGE]), - token_order, prefix, drop_tokens_threshold, - include_fields, exclude_fields); + sort_fields, std::stoi(req.params[NUM_TYPOS]), + static_cast(std::stoi(req.params[PER_PAGE])), + static_cast(std::stoi(req.params[PAGE])), + token_order, prefix, drop_tokens_threshold, + include_fields, exclude_fields, + static_cast(std::stoi(req.params[MAX_FACET_VALUES]))); uint64_t timeMillis = std::chrono::duration_cast( std::chrono::high_resolution_clock::now() - begin).count(); diff --git a/test/collection_test.cpp b/test/collection_test.cpp index 12c46574..7729ce3d 100644 --- a/test/collection_test.cpp +++ b/test/collection_test.cpp @@ -1560,6 +1560,7 @@ TEST_F(CollectionTest, FacetCounts) { ASSERT_EQ(1, results["facet_counts"].size()); ASSERT_EQ(2, results["facet_counts"][0].size()); ASSERT_EQ("tags", results["facet_counts"][0]["field_name"]); + ASSERT_EQ(4, results["facet_counts"][0]["counts"].size()); ASSERT_EQ("gold", results["facet_counts"][0]["counts"][0]["value"]); ASSERT_EQ(3, (int) results["facet_counts"][0]["counts"][0]["count"]); @@ -1570,6 +1571,26 @@ TEST_F(CollectionTest, FacetCounts) { ASSERT_EQ("bronze", results["facet_counts"][0]["counts"][2]["value"]); ASSERT_EQ(2, (int) results["facet_counts"][0]["counts"][2]["count"]); + ASSERT_EQ("FINE PLATINUM", results["facet_counts"][0]["counts"][3]["value"]); + ASSERT_EQ(1, (int) results["facet_counts"][0]["counts"][3]["count"]); + + // facet with facet count limit + results = coll_array_fields->search("Jeremy", query_fields, "", facets, sort_fields, 0, 10, 1, + FREQUENCY, false, 10, spp::sparse_hash_set(), + spp::sparse_hash_set(), 2).get(); + ASSERT_EQ(5, results["hits"].size()); + + ASSERT_EQ(1, results["facet_counts"].size()); + ASSERT_EQ(2, results["facet_counts"][0].size()); + ASSERT_EQ("tags", results["facet_counts"][0]["field_name"]); + ASSERT_EQ(2, results["facet_counts"][0]["counts"].size()); + + ASSERT_EQ("gold", results["facet_counts"][0]["counts"][0]["value"]); + ASSERT_EQ(3, (int) results["facet_counts"][0]["counts"][0]["count"]); + + ASSERT_EQ("silver", results["facet_counts"][0]["counts"][1]["value"]); + ASSERT_EQ(3, (int) results["facet_counts"][0]["counts"][1]["count"]); + // 2 facets, 1 text filter with no filters facets.clear(); facets.push_back("tags"); @@ -1624,7 +1645,6 @@ TEST_F(CollectionTest, FacetCounts) { ASSERT_EQ("bronze", results["facet_counts"][0]["counts"][2]["value"]); ASSERT_EQ("gold", results["facet_counts"][0]["counts"][3]["value"]); - collectionManager.drop_collection("coll_array_fields"); }