mirror of
https://github.com/typesense/typesense.git
synced 2025-05-18 04:32:38 +08:00
Allow maximum hits returned to be configurable.
This obviously has a performance impact, but it might not be a big deal for most people and is now left to their discretion. The default of 500 results stays to maintain backward compatibility.
This commit is contained in:
parent
b899b86a96
commit
fd285b6fbe
@ -211,7 +211,7 @@ public:
|
||||
const size_t drop_tokens_threshold = Index::DROP_TOKENS_THRESHOLD,
|
||||
const spp::sparse_hash_set<std::string> include_fields = spp::sparse_hash_set<std::string>(),
|
||||
const spp::sparse_hash_set<std::string> exclude_fields = spp::sparse_hash_set<std::string>(),
|
||||
const size_t max_facet_values=10);
|
||||
const size_t max_facet_values=10, size_t max_hits=512);
|
||||
|
||||
Option<nlohmann::json> get(const std::string & id);
|
||||
|
||||
@ -234,11 +234,10 @@ public:
|
||||
void par_index_in_memory(std::vector<std::vector<index_record>> & iter_batch,
|
||||
batch_index_result & result);
|
||||
|
||||
static void prune_document(nlohmann::json &document, const spp::sparse_hash_set<std::string> include_fields,
|
||||
const spp::sparse_hash_set<std::string> exclude_fields);
|
||||
static void prune_document(nlohmann::json &document, const spp::sparse_hash_set<std::string> & include_fields,
|
||||
const spp::sparse_hash_set<std::string> & exclude_fields);
|
||||
|
||||
static const int MAX_SEARCH_TOKENS = 10;
|
||||
static const int MAX_RESULTS = 500;
|
||||
|
||||
// strings under this length will be fully highlighted, instead of showing a snippet of relevant portion
|
||||
enum {SNIPPET_STR_ABOVE_LEN = 30};
|
||||
|
@ -31,6 +31,7 @@ struct search_args {
|
||||
std::vector<sort_by> sort_fields_std;
|
||||
int num_typos;
|
||||
size_t max_facet_values;
|
||||
size_t max_hits;
|
||||
size_t per_page;
|
||||
size_t page;
|
||||
token_ordering token_order;
|
||||
@ -49,10 +50,11 @@ struct search_args {
|
||||
search_args(std::string query, std::vector<std::string> search_fields, std::vector<filter> filters,
|
||||
std::vector<facet> facets, std::vector<uint32_t> included_ids, std::vector<uint32_t> excluded_ids,
|
||||
std::vector<sort_by> sort_fields_std, int num_typos, size_t max_facet_values,
|
||||
size_t per_page, size_t page, token_ordering token_order, bool prefix, size_t drop_tokens_threshold):
|
||||
size_t max_hits, size_t per_page, size_t page, token_ordering token_order, bool prefix,
|
||||
size_t drop_tokens_threshold):
|
||||
query(query), search_fields(search_fields), filters(filters), facets(facets), included_ids(included_ids),
|
||||
excluded_ids(excluded_ids), sort_fields_std(sort_fields_std), num_typos(num_typos),
|
||||
max_facet_values(max_facet_values), per_page(per_page),
|
||||
max_facet_values(max_facet_values), max_hits(max_hits), per_page(per_page),
|
||||
page(page), token_order(token_order), prefix(prefix), drop_tokens_threshold(drop_tokens_threshold),
|
||||
all_result_ids_len(0), outcome(0) {
|
||||
|
||||
@ -146,15 +148,15 @@ private:
|
||||
std::vector<facet> & facets, const std::vector<sort_by> & sort_fields,
|
||||
const int num_typos, const size_t num_results,
|
||||
std::vector<std::vector<art_leaf*>> & searched_queries,
|
||||
Topster<512> & topster, uint32_t** all_result_ids,
|
||||
Topster & topster, uint32_t** all_result_ids,
|
||||
size_t & all_result_ids_len, const token_ordering token_order = FREQUENCY,
|
||||
const bool prefix = false, const size_t drop_tokens_threshold = Index::DROP_TOKENS_THRESHOLD);
|
||||
|
||||
void search_candidates(const uint8_t & field_id, uint32_t* filter_ids, size_t filter_ids_length, std::vector<facet> & facets,
|
||||
const std::vector<sort_by> & sort_fields, std::vector<token_candidates> & token_to_candidates,
|
||||
const token_ordering token_order, std::vector<std::vector<art_leaf*>> & searched_queries,
|
||||
Topster<512> & topster, uint32_t** all_result_ids,
|
||||
size_t & all_result_ids_len, const size_t & max_results, const bool prefix);
|
||||
Topster & topster, uint32_t** all_result_ids,
|
||||
size_t & all_result_ids_len, const size_t & max_results);
|
||||
|
||||
void insert_doc(const uint32_t score, art_tree *t, uint32_t seq_id,
|
||||
const std::unordered_map<std::string, std::vector<uint32_t>> &token_to_offsets) const;
|
||||
@ -186,7 +188,7 @@ private:
|
||||
|
||||
void collate_curated_ids(const std::string & query, const std::string & field, const uint8_t field_id,
|
||||
const std::vector<uint32_t> & included_ids,
|
||||
Topster<512> & curated_topster, std::vector<std::vector<art_leaf*>> & searched_queries);
|
||||
Topster & curated_topster, std::vector<std::vector<art_leaf*>> & searched_queries);
|
||||
|
||||
public:
|
||||
Index() = delete;
|
||||
@ -202,7 +204,7 @@ public:
|
||||
const std::vector<filter> & filters, std::vector<facet> & facets,
|
||||
const std::vector<uint32_t> & included_ids, const std::vector<uint32_t> & excluded_ids,
|
||||
const std::vector<sort_by> & sort_fields_std, const int num_typos,
|
||||
const size_t per_page, const size_t page, const token_ordering token_order,
|
||||
const size_t max_hits, const size_t per_page, const size_t page, const token_ordering token_order,
|
||||
const bool prefix, const size_t drop_tokens_threshold, std::vector<KV> & raw_result_kvs,
|
||||
size_t & all_result_ids_len, std::vector<std::vector<art_leaf*>> & searched_queries,
|
||||
std::vector<KV> & override_result_kvs);
|
||||
@ -217,7 +219,7 @@ public:
|
||||
std::vector<std::vector<std::vector<uint16_t>>> &array_token_positions);
|
||||
|
||||
void score_results(const std::vector<sort_by> & sort_fields, const uint16_t & query_index, const uint8_t & field_id,
|
||||
const uint32_t total_cost, Topster<512> &topster, const std::vector<art_leaf *> & query_suggestion,
|
||||
const uint32_t total_cost, Topster &topster, const std::vector<art_leaf *> & query_suggestion,
|
||||
const uint32_t *result_ids, const size_t result_size) const;
|
||||
|
||||
static int32_t get_points_from_doc(const nlohmann::json &document, const std::string & default_sorting_field);
|
||||
|
@ -21,19 +21,20 @@ struct KV {
|
||||
/*
|
||||
* Remembers the max-K elements seen so far using a min-heap
|
||||
*/
|
||||
template <size_t MAX_SIZE=512>
|
||||
struct Topster {
|
||||
const uint32_t MAX_SIZE;
|
||||
KV *data;
|
||||
uint32_t size;
|
||||
|
||||
spp::sparse_hash_map<uint64_t, KV*> keys;
|
||||
|
||||
KV *kvs[MAX_SIZE];
|
||||
KV* *kvs;
|
||||
|
||||
Topster(): size(0){
|
||||
data = new KV[MAX_SIZE];
|
||||
explicit Topster(size_t capacity): MAX_SIZE(capacity), size(0) {
|
||||
kvs = new KV*[capacity];
|
||||
data = new KV[capacity];
|
||||
|
||||
for(size_t i=0; i<MAX_SIZE; i++) {
|
||||
for(size_t i=0; i<capacity; i++) {
|
||||
data[i].field_id = 0;
|
||||
data[i].query_index = 0;
|
||||
data[i].key = 0;
|
||||
@ -185,7 +186,7 @@ struct Topster {
|
||||
|
||||
// topster must be sorted before iterated upon to remove dead array entries
|
||||
void sort() {
|
||||
std::stable_sort(std::begin(kvs), std::begin(kvs) + size, is_greater_kv);
|
||||
std::stable_sort(kvs, kvs+size, is_greater_kv);
|
||||
}
|
||||
|
||||
void clear(){
|
||||
|
@ -264,8 +264,8 @@ void Collection::par_index_in_memory(std::vector<std::vector<index_record>> & it
|
||||
}
|
||||
}
|
||||
|
||||
void Collection::prune_document(nlohmann::json &document, const spp::sparse_hash_set<std::string> include_fields,
|
||||
const spp::sparse_hash_set<std::string> exclude_fields) {
|
||||
void Collection::prune_document(nlohmann::json &document, const spp::sparse_hash_set<std::string>& include_fields,
|
||||
const spp::sparse_hash_set<std::string>& exclude_fields) {
|
||||
auto it = document.begin();
|
||||
for(; it != document.end(); ) {
|
||||
if(exclude_fields.count(it.key()) != 0 || (include_fields.size() != 0 && include_fields.count(it.key()) == 0)) {
|
||||
@ -311,7 +311,7 @@ Option<nlohmann::json> Collection::search(std::string query, const std::vector<s
|
||||
const size_t drop_tokens_threshold,
|
||||
const spp::sparse_hash_set<std::string> include_fields,
|
||||
const spp::sparse_hash_set<std::string> exclude_fields,
|
||||
const size_t max_facet_values) {
|
||||
const size_t max_facet_values, const size_t max_hits) {
|
||||
|
||||
std::vector<uint32_t> included_ids;
|
||||
std::vector<uint32_t> excluded_ids;
|
||||
@ -492,8 +492,8 @@ Option<nlohmann::json> Collection::search(std::string query, const std::vector<s
|
||||
|
||||
const size_t num_results = (page * per_page);
|
||||
|
||||
if(num_results > MAX_RESULTS) {
|
||||
std::string message = "Only the first " + std::to_string(MAX_RESULTS) + " results are available.";
|
||||
if(num_results > max_hits) {
|
||||
std::string message = "Only the first " + std::to_string(max_hits) + " results are available.";
|
||||
return Option<nlohmann::json>(422, message);
|
||||
}
|
||||
|
||||
@ -508,8 +508,8 @@ Option<nlohmann::json> Collection::search(std::string query, const std::vector<s
|
||||
for(Index* index: indices) {
|
||||
index->search_params = search_args(query, search_fields, filters, facets,
|
||||
index_to_included_ids[index_id], index_to_excluded_ids[index_id],
|
||||
sort_fields_std, num_typos, max_facet_values, per_page, page,
|
||||
token_order, prefix, drop_tokens_threshold);
|
||||
sort_fields_std, num_typos, max_facet_values, max_hits,
|
||||
per_page, page, token_order, prefix, drop_tokens_threshold);
|
||||
{
|
||||
std::lock_guard<std::mutex> lk(index->m);
|
||||
index->ready = true;
|
||||
@ -577,7 +577,7 @@ Option<nlohmann::json> Collection::search(std::string query, const std::vector<s
|
||||
}
|
||||
|
||||
// All fields are sorted descending
|
||||
std::sort(raw_result_kvs.begin(), raw_result_kvs.end(), Topster<>::is_greater_kv_value);
|
||||
std::sort(raw_result_kvs.begin(), raw_result_kvs.end(), Topster::is_greater_kv_value);
|
||||
|
||||
// Sort based on position in overriden list
|
||||
std::sort(
|
||||
|
@ -176,6 +176,7 @@ void get_search(http_req & req, http_res & res) {
|
||||
const char *FACET_BY = "facet_by";
|
||||
const char *MAX_FACET_VALUES = "max_facet_values";
|
||||
|
||||
const char *MAX_HITS = "max_hits";
|
||||
const char *PER_PAGE = "per_page";
|
||||
const char *PAGE = "page";
|
||||
const char *CALLBACK = "callback";
|
||||
@ -207,6 +208,10 @@ void get_search(http_req & req, http_res & res) {
|
||||
req.params[MAX_FACET_VALUES] = "10";
|
||||
}
|
||||
|
||||
if(req.params.count(MAX_HITS) == 0) {
|
||||
req.params[MAX_HITS] = "500";
|
||||
}
|
||||
|
||||
if(req.params.count(PER_PAGE) == 0) {
|
||||
req.params[PER_PAGE] = "10";
|
||||
}
|
||||
@ -301,7 +306,8 @@ void get_search(http_req & req, http_res & res) {
|
||||
static_cast<size_t>(std::stoi(req.params[PAGE])),
|
||||
token_order, prefix, drop_tokens_threshold,
|
||||
include_fields, exclude_fields,
|
||||
static_cast<size_t>(std::stoi(req.params[MAX_FACET_VALUES])));
|
||||
static_cast<size_t>(std::stoi(req.params[MAX_FACET_VALUES])),
|
||||
static_cast<size_t>(std::stoi(req.params[MAX_HITS])));
|
||||
|
||||
uint64_t timeMillis = std::chrono::duration_cast<std::chrono::milliseconds>(
|
||||
std::chrono::high_resolution_clock::now() - begin).count();
|
||||
|
@ -559,9 +559,9 @@ void Index::drop_facets(std::vector<facet> & facets, const std::vector<uint32_t>
|
||||
void Index::search_candidates(const uint8_t & field_id, uint32_t* filter_ids, size_t filter_ids_length, std::vector<facet> & facets,
|
||||
const std::vector<sort_by> & sort_fields,
|
||||
std::vector<token_candidates> & token_candidates_vec, const token_ordering token_order,
|
||||
std::vector<std::vector<art_leaf*>> & searched_queries, Topster<512> & topster,
|
||||
std::vector<std::vector<art_leaf*>> & searched_queries, Topster & topster,
|
||||
uint32_t** all_result_ids, size_t & all_result_ids_len,
|
||||
const size_t & max_results, const bool prefix) {
|
||||
const size_t & max_results) {
|
||||
const long long combination_limit = 10;
|
||||
|
||||
auto product = []( long long a, token_candidates & b ) { return a*b.candidates.size(); };
|
||||
@ -795,7 +795,7 @@ void Index::run_search() {
|
||||
search(search_params.outcome, search_params.query, search_params.search_fields,
|
||||
search_params.filters, search_params.facets, search_params.included_ids,
|
||||
search_params.excluded_ids, search_params.sort_fields_std, search_params.num_typos,
|
||||
search_params.per_page, search_params.page, search_params.token_order,
|
||||
search_params.max_hits, search_params.per_page, search_params.page, search_params.token_order,
|
||||
search_params.prefix, search_params.drop_tokens_threshold, search_params.raw_result_kvs,
|
||||
search_params.all_result_ids_len, search_params.searched_queries, search_params.override_result_kvs);
|
||||
|
||||
@ -811,7 +811,7 @@ void Index::run_search() {
|
||||
|
||||
void Index::collate_curated_ids(const std::string & query, const std::string & field, const uint8_t field_id,
|
||||
const std::vector<uint32_t> & included_ids,
|
||||
Topster<512> & curated_topster,
|
||||
Topster & curated_topster,
|
||||
std::vector<std::vector<art_leaf*>> & searched_queries) {
|
||||
|
||||
if(included_ids.size() == 0) {
|
||||
@ -873,11 +873,13 @@ void Index::collate_curated_ids(const std::string & query, const std::string & f
|
||||
}
|
||||
}
|
||||
|
||||
void Index::search(Option<uint32_t> & outcome, std::string query, const std::vector<std::string> & search_fields,
|
||||
void Index::search(Option<uint32_t> & outcome,
|
||||
std::string query,
|
||||
const std::vector<std::string> & search_fields,
|
||||
const std::vector<filter> & filters, std::vector<facet> & facets,
|
||||
const std::vector<uint32_t> & included_ids,
|
||||
const std::vector<uint32_t> & excluded_ids,
|
||||
const std::vector<sort_by> & sort_fields_std, const int num_typos,
|
||||
const std::vector<sort_by> & sort_fields_std, const int num_typos, const size_t max_hits,
|
||||
const size_t per_page, const size_t page, const token_ordering token_order,
|
||||
const bool prefix, const size_t drop_tokens_threshold,
|
||||
std::vector<KV> & raw_result_kvs,
|
||||
@ -902,8 +904,8 @@ void Index::search(Option<uint32_t> & outcome, std::string query, const std::vec
|
||||
//auto begin = std::chrono::high_resolution_clock::now();
|
||||
uint32_t* all_result_ids = nullptr;
|
||||
|
||||
Topster<512> topster;
|
||||
Topster<512> curated_topster;
|
||||
Topster topster(max_hits);
|
||||
Topster curated_topster(max_hits);
|
||||
|
||||
if(query == "*") {
|
||||
const uint8_t field_id = (uint8_t)(FIELD_LIMIT_NUM - 0);
|
||||
@ -986,7 +988,7 @@ void Index::search_field(const uint8_t & field_id, std::string & query, const st
|
||||
uint32_t *filter_ids, size_t filter_ids_length,
|
||||
std::vector<facet> & facets, const std::vector<sort_by> & sort_fields, const int num_typos,
|
||||
const size_t num_results, std::vector<std::vector<art_leaf*>> & searched_queries,
|
||||
Topster<512> &topster, uint32_t** all_result_ids, size_t & all_result_ids_len,
|
||||
Topster & topster, uint32_t** all_result_ids, size_t & all_result_ids_len,
|
||||
const token_ordering token_order, const bool prefix, const size_t drop_tokens_threshold) {
|
||||
std::vector<std::string> tokens;
|
||||
StringUtils::split(query, tokens, " ");
|
||||
@ -1103,7 +1105,7 @@ void Index::search_field(const uint8_t & field_id, std::string & query, const st
|
||||
// If all tokens were found, go ahead and search for candidates with what we have so far
|
||||
search_candidates(field_id, filter_ids, filter_ids_length, facets, sort_fields, token_candidates_vec,
|
||||
token_order, searched_queries, topster, all_result_ids, all_result_ids_len,
|
||||
Index::SEARCH_LIMIT_NUM, prefix);
|
||||
Index::SEARCH_LIMIT_NUM);
|
||||
|
||||
if (all_result_ids_len >= Index::SEARCH_LIMIT_NUM) {
|
||||
// If we don't find enough results, we continue outerloop (looking at tokens with greater cost)
|
||||
@ -1154,7 +1156,7 @@ void Index::log_leaves(const int cost, const std::string &token, const std::vect
|
||||
}
|
||||
|
||||
void Index::score_results(const std::vector<sort_by> & sort_fields, const uint16_t & query_index, const uint8_t & field_id,
|
||||
const uint32_t total_cost, Topster<512> & topster,
|
||||
const uint32_t total_cost, Topster & topster,
|
||||
const std::vector<art_leaf *> &query_suggestion,
|
||||
const uint32_t *result_ids, const size_t result_size) const {
|
||||
|
||||
|
@ -3,7 +3,7 @@
|
||||
#include "match_score.h"
|
||||
|
||||
TEST(TopsterTest, MaxIntValues) {
|
||||
Topster<5> topster;
|
||||
Topster topster(5);
|
||||
|
||||
struct {
|
||||
uint8_t field_id;
|
||||
@ -52,7 +52,7 @@ TEST(TopsterTest, MaxIntValues) {
|
||||
}
|
||||
|
||||
TEST(TopsterTest, MaxFloatValues) {
|
||||
Topster<5> topster;
|
||||
Topster topster(5);
|
||||
|
||||
struct {
|
||||
uint8_t field_id;
|
||||
|
Loading…
x
Reference in New Issue
Block a user