diff --git a/src/index.cpp b/src/index.cpp index 73319d21..94f75dbf 100644 --- a/src/index.cpp +++ b/src/index.cpp @@ -3012,6 +3012,13 @@ Option Index::search(std::vector& field_query_tokens, cons continue; } + if(vector_query.query_doc_given && nearest_ids.size() >= k-1) { + // When id based vector query is made, we ask for K+1 results to account for the query + // record itself being returned. However, when the filter clause does not match the + // query record, we will end up returning 1 extra hit. + break; + } + uint64_t distinct_id = seq_id; if (group_limit != 0) { distinct_id = 1; @@ -3049,6 +3056,7 @@ Option Index::search(std::vector& field_query_tokens, cons if(group_limit != 0 && ret < 2) { groups_processed[distinct_id]++; } + nearest_ids.push_back(seq_id); } diff --git a/test/collection_vector_search_test.cpp b/test/collection_vector_search_test.cpp index f545af0f..e3e90b71 100644 --- a/test/collection_vector_search_test.cpp +++ b/test/collection_vector_search_test.cpp @@ -174,6 +174,18 @@ TEST_F(CollectionVectorTest, BasicVectorQuerying) { ASSERT_STREQ("0", results["hits"][0]["document"]["id"].get().c_str()); ASSERT_STREQ("2", results["hits"][1]["document"]["id"].get().c_str()); + // when id does not match filter, don't return k+1 hits + results = coll1->search("*", {}, "id:!=1", {}, {}, {0}, 10, 1, FREQUENCY, {true}, Index::DROP_TOKENS_THRESHOLD, + spp::sparse_hash_set(), + spp::sparse_hash_set(), 10, "", 30, 5, + "", 10, {}, {}, {}, 0, + "", "", {}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7, fallback, + 4, {off}, 32767, 32767, 2, + false, true, "vec:([], id: 1, k:1)").get(); + + ASSERT_EQ(1, results["found"].get()); + ASSERT_EQ(1, results["hits"].size()); + // `k` value should overrides per_page results = coll1->search("*", {}, "", {}, {}, {0}, 10, 1, FREQUENCY, {true}, Index::DROP_TOKENS_THRESHOLD, spp::sparse_hash_set(), @@ -1356,7 +1368,6 @@ TEST_F(CollectionVectorTest, DistanceThresholdTest) { } - TEST_F(CollectionVectorTest, HybridSearchSortByGeopoint) { nlohmann::json schema = R"({ "name": "objects",