mirror of
https://github.com/typesense/typesense.git
synced 2025-05-20 05:32:30 +08:00
Fix bug with offset/pagination affecting vector search.
This commit is contained in:
parent
2cffc015e7
commit
a7adc1c3ca
@ -463,7 +463,7 @@ public:
|
||||
const text_match_type_t match_type = max_score,
|
||||
const size_t facet_sample_percent = 100,
|
||||
const size_t facet_sample_threshold = 0,
|
||||
const size_t page_offset = UINT32_MAX,
|
||||
const size_t page_offset = 0,
|
||||
facet_index_type_t facet_index_type = HASH,
|
||||
const size_t vector_query_hits = 250) const;
|
||||
|
||||
|
@ -152,7 +152,7 @@ struct search_args {
|
||||
filter_node_t* filter_tree_root, std::vector<facet>& facets,
|
||||
std::vector<std::pair<uint32_t, uint32_t>>& included_ids, std::vector<uint32_t> excluded_ids,
|
||||
std::vector<sort_by>& sort_fields_std, facet_query_t facet_query, const std::vector<uint32_t>& num_typos,
|
||||
size_t max_facet_values, size_t max_hits, size_t per_page, size_t page, token_ordering token_order,
|
||||
size_t max_facet_values, size_t max_hits, size_t per_page, size_t offset, token_ordering token_order,
|
||||
const std::vector<bool>& prefixes, size_t drop_tokens_threshold, size_t typo_tokens_threshold,
|
||||
const std::vector<std::string>& group_by_fields, size_t group_limit,
|
||||
const string& default_sorting_field, bool prioritize_exact_match,
|
||||
|
@ -1393,7 +1393,17 @@ Option<nlohmann::json> Collection::search(std::string raw_query,
|
||||
return Option<nlohmann::json>(422, message);
|
||||
}
|
||||
|
||||
size_t offset = (page != 0) ? (per_page * (page - 1)) : page_offset;
|
||||
size_t offset = 0;
|
||||
|
||||
if(page == 0 && page_offset != 0) {
|
||||
// if only offset is set, use that
|
||||
offset = page_offset;
|
||||
} else {
|
||||
// if both are set or none set, use page value (default is 1)
|
||||
size_t actual_page = (page == 0) ? 1 : page;
|
||||
offset = (per_page * (actual_page - 1));
|
||||
}
|
||||
|
||||
size_t fetch_size = offset + per_page;
|
||||
|
||||
if(fetch_size > limit_hits) {
|
||||
|
@ -783,7 +783,7 @@ Option<bool> CollectionManager::do_search(std::map<std::string, std::string>& re
|
||||
std::vector<sort_by> sort_fields;
|
||||
size_t per_page = 10;
|
||||
size_t page = 0;
|
||||
size_t offset = UINT32_MAX;
|
||||
size_t offset = 0;
|
||||
token_ordering token_order = NOT_SET;
|
||||
|
||||
std::string vector_query;
|
||||
@ -978,14 +978,6 @@ Option<bool> CollectionManager::do_search(std::map<std::string, std::string>& re
|
||||
per_page = 0;
|
||||
}
|
||||
|
||||
if(!req_params[PAGE].empty() && page == 0 && offset == UINT32_MAX) {
|
||||
return Option<bool>(422, "Parameter `page` must be an integer of value greater than 0.");
|
||||
}
|
||||
|
||||
if(req_params[PAGE].empty() && req_params[OFFSET].empty()) {
|
||||
page = 1;
|
||||
}
|
||||
|
||||
include_fields.insert(include_fields_vec.begin(), include_fields_vec.end());
|
||||
exclude_fields.insert(exclude_fields_vec.begin(), exclude_fields_vec.end());
|
||||
|
||||
@ -1097,10 +1089,10 @@ Option<bool> CollectionManager::do_search(std::map<std::string, std::string>& re
|
||||
result["search_time_ms"] = timeMillis;
|
||||
}
|
||||
|
||||
if(page != 0) {
|
||||
result["page"] = page;
|
||||
} else {
|
||||
if(page == 0 && offset != 0) {
|
||||
result["offset"] = offset;
|
||||
} else {
|
||||
result["page"] = (page == 0) ? 1 : page;
|
||||
}
|
||||
|
||||
results_json_str = result.dump(-1, ' ', false, nlohmann::detail::error_handler_t::ignore);
|
||||
|
@ -2397,6 +2397,10 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
|
||||
(filter_id_count >= vector_query.flat_search_cutoff && filter_result_iterator->is_valid)) {
|
||||
dist_labels.clear();
|
||||
|
||||
if(no_filters_provided) {
|
||||
filter_result_iterator->approx_filter_ids_length = 0;
|
||||
}
|
||||
|
||||
VectorFilterFunctor filterFunctor(filter_result_iterator);
|
||||
|
||||
if(field_vector_index->distance_type == cosine) {
|
||||
@ -2407,6 +2411,7 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
|
||||
dist_labels = field_vector_index->vecdex->searchKnnCloserFirst(vector_query.values.data(), k, &filterFunctor);
|
||||
}
|
||||
}
|
||||
|
||||
filter_result_iterator->reset();
|
||||
|
||||
std::vector<uint32_t> nearest_ids;
|
||||
@ -2657,6 +2662,10 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
|
||||
constexpr float TEXT_MATCH_WEIGHT = 0.7;
|
||||
constexpr float VECTOR_SEARCH_WEIGHT = 1.0 - TEXT_MATCH_WEIGHT;
|
||||
|
||||
if(no_filters_provided) {
|
||||
filter_result_iterator->approx_filter_ids_length = 0;
|
||||
}
|
||||
|
||||
VectorFilterFunctor filterFunctor(filter_result_iterator);
|
||||
auto& field_vector_index = vector_index.at(vector_query.field_name);
|
||||
std::vector<std::pair<float, size_t>> dist_labels;
|
||||
|
@ -1055,11 +1055,11 @@ TEST_F(CollectionTest, KeywordQueryReturnsResultsBasedOnPerPageParam) {
|
||||
ASSERT_EQ(422, res_op.code());
|
||||
ASSERT_STREQ("Only upto 250 hits can be fetched per page.", res_op.error().c_str());
|
||||
|
||||
// when page number is not valid
|
||||
res_op = coll_mul_fields->search("w", query_fields, "", facets, sort_fields, {0}, 10, 0,
|
||||
FREQUENCY, {true}, 1000, empty, empty, 10);
|
||||
ASSERT_FALSE(res_op.ok());
|
||||
ASSERT_EQ(422, res_op.code());
|
||||
// when page number is zero, use the first page
|
||||
results = coll_mul_fields->search("w", query_fields, "", facets, sort_fields, {0}, 3, 0,
|
||||
FREQUENCY, {true}, 1000, empty, empty, 10).get();
|
||||
ASSERT_EQ(3, results["hits"].size());
|
||||
ASSERT_EQ(6, results["found"].get<int>());
|
||||
|
||||
// do pagination
|
||||
|
||||
@ -3027,11 +3027,11 @@ TEST_F(CollectionTest, WildcardQueryReturnsResultsBasedOnPerPageParam) {
|
||||
ASSERT_EQ(422, res_op.code());
|
||||
ASSERT_STREQ("Only upto 250 hits can be fetched per page.", res_op.error().c_str());
|
||||
|
||||
// when page number is not valid
|
||||
res_op = collection->search("*", query_fields, "", facets, sort_fields, {0}, 10, 0,
|
||||
FREQUENCY, {false}, 1000, empty, empty, 10);
|
||||
ASSERT_FALSE(res_op.ok());
|
||||
ASSERT_EQ(422, res_op.code());
|
||||
// when page number is 0, just fetch first page
|
||||
results = collection->search("*", query_fields, "", facets, sort_fields, {0}, 10, 0,
|
||||
FREQUENCY, {false}, 1000, empty, empty, 10).get();
|
||||
ASSERT_EQ(10, results["hits"].size());
|
||||
ASSERT_EQ(25, results["found"].get<int>());
|
||||
|
||||
// do pagination
|
||||
|
||||
|
@ -768,7 +768,7 @@ TEST_F(CoreAPIUtilsTest, SearchPagination) {
|
||||
ASSERT_EQ(400, results["code"].get<size_t>());
|
||||
ASSERT_EQ("Parameter `offset` must be an unsigned integer.", results["error"].get<std::string>());
|
||||
|
||||
// when page is 0 and no offset is sent
|
||||
// when page is 0 and offset is NOT sent, we will treat as page=1
|
||||
search.clear();
|
||||
req->params.clear();
|
||||
body["searches"] = nlohmann::json::array();
|
||||
@ -782,8 +782,29 @@ TEST_F(CoreAPIUtilsTest, SearchPagination) {
|
||||
|
||||
post_multi_search(req, res);
|
||||
results = nlohmann::json::parse(res->body)["results"][0];
|
||||
ASSERT_EQ(422, results["code"].get<size_t>());
|
||||
ASSERT_EQ("Parameter `page` must be an integer of value greater than 0.", results["error"].get<std::string>());
|
||||
ASSERT_EQ(10, results["hits"].size());
|
||||
ASSERT_EQ(1, results["page"].get<size_t>());
|
||||
ASSERT_EQ(0, results.count("offset"));
|
||||
|
||||
// when both page and offset are sent, use page
|
||||
search.clear();
|
||||
req->params.clear();
|
||||
body["searches"] = nlohmann::json::array();
|
||||
search["collection"] = "coll1";
|
||||
search["q"] = "title";
|
||||
search["page"] = "2";
|
||||
search["offset"] = "30";
|
||||
search["query_by"] = "name";
|
||||
search["sort_by"] = "points:desc";
|
||||
body["searches"].push_back(search);
|
||||
req->body = body.dump();
|
||||
|
||||
post_multi_search(req, res);
|
||||
results = nlohmann::json::parse(res->body)["results"][0];
|
||||
ASSERT_EQ(10, results["hits"].size());
|
||||
ASSERT_EQ(2, results["page"].get<size_t>());
|
||||
ASSERT_EQ(0, results.count("offset"));
|
||||
|
||||
}
|
||||
|
||||
TEST_F(CoreAPIUtilsTest, ExportWithFilter) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user