Parameter for controlling max number of facets returned during search.

This commit is contained in:
Kishore Nallan 2019-06-26 18:57:01 +05:30
parent aa439c1ea6
commit e6dad1682c
5 changed files with 45 additions and 15 deletions

View File

@ -128,7 +128,8 @@ public:
const token_ordering token_order = FREQUENCY, const bool prefix = false,
const size_t drop_tokens_threshold = Index::DROP_TOKENS_THRESHOLD,
const spp::sparse_hash_set<std::string> include_fields = spp::sparse_hash_set<std::string>(),
const spp::sparse_hash_set<std::string> exclude_fields = spp::sparse_hash_set<std::string>());
const spp::sparse_hash_set<std::string> exclude_fields = spp::sparse_hash_set<std::string>(),
const size_t max_facet_values=10);
Option<nlohmann::json> get(const std::string & id);

View File

@ -28,6 +28,7 @@ struct search_args {
std::vector<facet> facets;
std::vector<sort_by> sort_fields_std;
int num_typos;
size_t max_facet_values;
size_t per_page;
size_t page;
token_ordering token_order;
@ -43,11 +44,11 @@ struct search_args {
}
search_args(std::string query, std::vector<std::string> search_fields, std::vector<filter> filters,
std::vector<facet> facets, std::vector<sort_by> sort_fields_std, int num_typos,
std::vector<facet> facets, std::vector<sort_by> sort_fields_std, int num_typos, size_t max_facet_values,
size_t per_page, size_t page, token_ordering token_order, bool prefix, size_t drop_tokens_threshold):
query(query), search_fields(search_fields), filters(filters), facets(facets),
sort_fields_std(sort_fields_std), num_typos(num_typos), per_page(per_page), page(page),
token_order(token_order), prefix(prefix), drop_tokens_threshold(drop_tokens_threshold),
sort_fields_std(sort_fields_std), num_typos(num_typos), max_facet_values(max_facet_values), per_page(per_page),
page(page), token_order(token_order), prefix(prefix), drop_tokens_threshold(drop_tokens_threshold),
all_result_ids_len(0), outcome(0) {
}

View File

@ -280,7 +280,8 @@ Option<nlohmann::json> Collection::search(std::string query, const std::vector<s
const token_ordering token_order, const bool prefix,
const size_t drop_tokens_threshold,
const spp::sparse_hash_set<std::string> include_fields,
const spp::sparse_hash_set<std::string> exclude_fields) {
const spp::sparse_hash_set<std::string> exclude_fields,
const size_t max_facet_values) {
std::vector<facet> facets;
// validate search fields
@ -447,8 +448,6 @@ Option<nlohmann::json> Collection::search(std::string query, const std::vector<s
return Option<nlohmann::json>(422, message);
}
//auto begin = std::chrono::high_resolution_clock::now();
// all search queries that were used for generating the results
std::vector<std::vector<art_leaf*>> searched_queries;
std::vector<Topster<512>::KV> field_order_kvs;
@ -457,14 +456,14 @@ Option<nlohmann::json> Collection::search(std::string query, const std::vector<s
// send data to individual index threads
for(Index* index: indices) {
index->search_params = search_args(query, search_fields, filters, facets, sort_fields_std,
num_typos, per_page, page, token_order, prefix, drop_tokens_threshold);
num_typos, max_facet_values, per_page, page, token_order, prefix,
drop_tokens_threshold);
{
std::lock_guard<std::mutex> lk(index->m);
index->ready = true;
index->processed = false;
}
index->cv.notify_one();
//std::this_thread::sleep_for(std::chrono::milliseconds(400));
}
Option<nlohmann::json> index_search_op({}); // stores the last error across all index threads
@ -616,7 +615,7 @@ Option<nlohmann::json> Collection::search(std::string query, const std::vector<s
return a.second > b.second;
});
for(size_t i = 0; i < std::min((size_t)100, value_to_count.size()); i++) {
for(size_t i = 0; i < std::min(max_facet_values, value_to_count.size()); i++) {
auto & kv = value_to_count[i];
nlohmann::json facet_value_count = nlohmann::json::object();
facet_value_count["value"] = kv.first;

View File

@ -172,7 +172,10 @@ void get_search(http_req & req, http_res & res) {
const char *QUERY = "q";
const char *QUERY_BY = "query_by";
const char *SORT_BY = "sort_by";
const char *FACET_BY = "facet_by";
const char *MAX_FACET_VALUES = "max_facet_values";
const char *PER_PAGE = "per_page";
const char *PAGE = "page";
const char *CALLBACK = "callback";
@ -200,6 +203,10 @@ void get_search(http_req & req, http_res & res) {
return res.send_400(std::string("Parameter `") + QUERY_BY + "` is required.");
}
if(req.params.count(MAX_FACET_VALUES) == 0) {
req.params[MAX_FACET_VALUES] = "10";
}
if(req.params.count(PER_PAGE) == 0) {
req.params[PER_PAGE] = "10";
}
@ -289,10 +296,12 @@ void get_search(http_req & req, http_res & res) {
token_ordering token_order = (req.params[RANK_TOKENS_BY] == "DEFAULT_SORTING_FIELD") ? MAX_SCORE : FREQUENCY;
Option<nlohmann::json> result_op = collection->search(req.params[QUERY], search_fields, filter_str, facet_fields,
sort_fields, std::stoi(req.params[NUM_TYPOS]),
std::stoi(req.params[PER_PAGE]), std::stoi(req.params[PAGE]),
token_order, prefix, drop_tokens_threshold,
include_fields, exclude_fields);
sort_fields, std::stoi(req.params[NUM_TYPOS]),
static_cast<size_t>(std::stoi(req.params[PER_PAGE])),
static_cast<size_t>(std::stoi(req.params[PAGE])),
token_order, prefix, drop_tokens_threshold,
include_fields, exclude_fields,
static_cast<size_t>(std::stoi(req.params[MAX_FACET_VALUES])));
uint64_t timeMillis = std::chrono::duration_cast<std::chrono::milliseconds>(
std::chrono::high_resolution_clock::now() - begin).count();

View File

@ -1560,6 +1560,7 @@ TEST_F(CollectionTest, FacetCounts) {
ASSERT_EQ(1, results["facet_counts"].size());
ASSERT_EQ(2, results["facet_counts"][0].size());
ASSERT_EQ("tags", results["facet_counts"][0]["field_name"]);
ASSERT_EQ(4, results["facet_counts"][0]["counts"].size());
ASSERT_EQ("gold", results["facet_counts"][0]["counts"][0]["value"]);
ASSERT_EQ(3, (int) results["facet_counts"][0]["counts"][0]["count"]);
@ -1570,6 +1571,26 @@ TEST_F(CollectionTest, FacetCounts) {
ASSERT_EQ("bronze", results["facet_counts"][0]["counts"][2]["value"]);
ASSERT_EQ(2, (int) results["facet_counts"][0]["counts"][2]["count"]);
ASSERT_EQ("FINE PLATINUM", results["facet_counts"][0]["counts"][3]["value"]);
ASSERT_EQ(1, (int) results["facet_counts"][0]["counts"][3]["count"]);
// facet with facet count limit
results = coll_array_fields->search("Jeremy", query_fields, "", facets, sort_fields, 0, 10, 1,
FREQUENCY, false, 10, spp::sparse_hash_set<std::string>(),
spp::sparse_hash_set<std::string>(), 2).get();
ASSERT_EQ(5, results["hits"].size());
ASSERT_EQ(1, results["facet_counts"].size());
ASSERT_EQ(2, results["facet_counts"][0].size());
ASSERT_EQ("tags", results["facet_counts"][0]["field_name"]);
ASSERT_EQ(2, results["facet_counts"][0]["counts"].size());
ASSERT_EQ("gold", results["facet_counts"][0]["counts"][0]["value"]);
ASSERT_EQ(3, (int) results["facet_counts"][0]["counts"][0]["count"]);
ASSERT_EQ("silver", results["facet_counts"][0]["counts"][1]["value"]);
ASSERT_EQ(3, (int) results["facet_counts"][0]["counts"][1]["count"]);
// 2 facets, 1 text filter with no filters
facets.clear();
facets.push_back("tags");
@ -1624,7 +1645,6 @@ TEST_F(CollectionTest, FacetCounts) {
ASSERT_EQ("bronze", results["facet_counts"][0]["counts"][2]["value"]);
ASSERT_EQ("gold", results["facet_counts"][0]["counts"][3]["value"]);
collectionManager.drop_collection("coll_array_fields");
}