mirror of
https://github.com/typesense/typesense.git
synced 2025-05-26 00:36:22 +08:00
Fix vector query by id returning k+1 hit.
This commit is contained in:
parent
15114a6c87
commit
ec02d9fe9a
@ -3012,6 +3012,13 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
|
||||
continue;
|
||||
}
|
||||
|
||||
if(vector_query.query_doc_given && nearest_ids.size() >= k-1) {
|
||||
// When id based vector query is made, we ask for K+1 results to account for the query
|
||||
// record itself being returned. However, when the filter clause does not match the
|
||||
// query record, we will end up returning 1 extra hit.
|
||||
break;
|
||||
}
|
||||
|
||||
uint64_t distinct_id = seq_id;
|
||||
if (group_limit != 0) {
|
||||
distinct_id = 1;
|
||||
@ -3049,6 +3056,7 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
|
||||
if(group_limit != 0 && ret < 2) {
|
||||
groups_processed[distinct_id]++;
|
||||
}
|
||||
|
||||
nearest_ids.push_back(seq_id);
|
||||
}
|
||||
|
||||
|
@ -174,6 +174,18 @@ TEST_F(CollectionVectorTest, BasicVectorQuerying) {
|
||||
ASSERT_STREQ("0", results["hits"][0]["document"]["id"].get<std::string>().c_str());
|
||||
ASSERT_STREQ("2", results["hits"][1]["document"]["id"].get<std::string>().c_str());
|
||||
|
||||
// when id does not match filter, don't return k+1 hits
|
||||
results = coll1->search("*", {}, "id:!=1", {}, {}, {0}, 10, 1, FREQUENCY, {true}, Index::DROP_TOKENS_THRESHOLD,
|
||||
spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 5,
|
||||
"", 10, {}, {}, {}, 0,
|
||||
"<mark>", "</mark>", {}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7, fallback,
|
||||
4, {off}, 32767, 32767, 2,
|
||||
false, true, "vec:([], id: 1, k:1)").get();
|
||||
|
||||
ASSERT_EQ(1, results["found"].get<size_t>());
|
||||
ASSERT_EQ(1, results["hits"].size());
|
||||
|
||||
// `k` value should overrides per_page
|
||||
results = coll1->search("*", {}, "", {}, {}, {0}, 10, 1, FREQUENCY, {true}, Index::DROP_TOKENS_THRESHOLD,
|
||||
spp::sparse_hash_set<std::string>(),
|
||||
@ -1356,7 +1368,6 @@ TEST_F(CollectionVectorTest, DistanceThresholdTest) {
|
||||
|
||||
}
|
||||
|
||||
|
||||
TEST_F(CollectionVectorTest, HybridSearchSortByGeopoint) {
|
||||
nlohmann::json schema = R"({
|
||||
"name": "objects",
|
||||
|
Loading…
x
Reference in New Issue
Block a user