mirror of
https://github.com/typesense/typesense.git
synced 2025-05-20 21:52:23 +08:00
Vector flat search: handle missing values.
This commit is contained in:
parent
790fac008b
commit
ae49dab8fd
@ -604,7 +604,7 @@ struct sort_by {
|
||||
struct vector_query_t {
|
||||
std::string field_name;
|
||||
size_t k = 0;
|
||||
size_t flat_search_cutoff = 1000;
|
||||
size_t flat_search_cutoff = 0;
|
||||
std::vector<float> values;
|
||||
|
||||
void _reset() {
|
||||
|
@ -2515,9 +2515,16 @@ void Index::search(std::vector<query_tokens_t>& field_query_tokens, const std::v
|
||||
if(!no_filters_provided && filter_ids_length < vector_query.flat_search_cutoff) {
|
||||
for(size_t i = 0; i < filter_ids_length; i++) {
|
||||
auto seq_id = filter_ids[i];
|
||||
const std::vector<float>& values = field_vector_index->vecdex->getDataByLabel<float>(seq_id);
|
||||
float dist;
|
||||
std::vector<float> values;
|
||||
|
||||
try {
|
||||
values = field_vector_index->vecdex->getDataByLabel<float>(seq_id);
|
||||
} catch(...) {
|
||||
// likely not found
|
||||
continue;
|
||||
}
|
||||
|
||||
float dist;
|
||||
if(field_vector_index->distance_type == cosine) {
|
||||
std::vector<float> normalized_q(vector_query.values.size());
|
||||
hnsw_index_t::normalize_vector(vector_query.values, normalized_q);
|
||||
@ -2565,6 +2572,7 @@ void Index::search(std::vector<query_tokens_t>& field_query_tokens, const std::v
|
||||
}
|
||||
|
||||
if(!nearest_ids.empty()) {
|
||||
std::sort(nearest_ids.begin(), nearest_ids.end()); // seq_ids should be in ascending order
|
||||
all_result_ids = new uint32[nearest_ids.size()];
|
||||
std::copy(nearest_ids.begin(), nearest_ids.end(), all_result_ids);
|
||||
all_result_ids_len = nearest_ids.size();
|
||||
|
@ -319,6 +319,107 @@ TEST_F(CollectionVectorTest, VecSearchWithFiltering) {
|
||||
ASSERT_EQ(1, results["hits"].size());
|
||||
}
|
||||
|
||||
TEST_F(CollectionVectorTest, VecSearchWithFilteringWithMissingVectorValues) {
|
||||
nlohmann::json schema = R"({
|
||||
"name": "coll1",
|
||||
"fields": [
|
||||
{"name": "title", "type": "string"},
|
||||
{"name": "points", "type": "int32"},
|
||||
{"name": "vec", "type": "float[]", "num_dim": 4, "optional": true}
|
||||
]
|
||||
})"_json;
|
||||
|
||||
Collection* coll1 = collectionManager.create_collection(schema).get();
|
||||
|
||||
std::mt19937 rng;
|
||||
rng.seed(47);
|
||||
std::uniform_real_distribution<> distrib;
|
||||
|
||||
size_t num_docs = 20;
|
||||
|
||||
for (size_t i = 0; i < num_docs; i++) {
|
||||
nlohmann::json doc;
|
||||
doc["id"] = std::to_string(i);
|
||||
doc["title"] = std::to_string(i) + " title";
|
||||
doc["points"] = i;
|
||||
|
||||
std::vector<float> values;
|
||||
for(size_t j = 0; j < 4; j++) {
|
||||
values.push_back(distrib(rng));
|
||||
}
|
||||
|
||||
if(i != 5 && i != 15) {
|
||||
doc["vec"] = values;
|
||||
}
|
||||
ASSERT_TRUE(coll1->add(doc.dump()).ok());
|
||||
}
|
||||
|
||||
auto results = coll1->search("*", {}, "", {}, {}, {0}, 20, 1, FREQUENCY, {true}, Index::DROP_TOKENS_THRESHOLD,
|
||||
spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 5,
|
||||
"", 10, {}, {}, {}, 0,
|
||||
"<mark>", "</mark>", {}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7,
|
||||
fallback,
|
||||
4, {off}, 32767, 32767, 2,
|
||||
false, true, "vec:([0.96826, 0.94, 0.39557, 0.306488])").get();
|
||||
|
||||
ASSERT_EQ(18, results["found"].get<size_t>());
|
||||
ASSERT_EQ(18, results["hits"].size());
|
||||
|
||||
// with points:<10, non-flat-search
|
||||
|
||||
results = coll1->search("*", {}, "points:<10", {}, {}, {0}, 20, 1, FREQUENCY, {true}, Index::DROP_TOKENS_THRESHOLD,
|
||||
spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 5,
|
||||
"", 10, {}, {}, {}, 0,
|
||||
"<mark>", "</mark>", {}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7,
|
||||
fallback,
|
||||
4, {off}, 32767, 32767, 2,
|
||||
false, true, "vec:([0.96826, 0.94, 0.39557, 0.306488], flat_search_cutoff: 0)").get();
|
||||
|
||||
ASSERT_EQ(9, results["found"].get<size_t>());
|
||||
ASSERT_EQ(9, results["hits"].size());
|
||||
|
||||
// with points:<10, flat-search
|
||||
results = coll1->search("*", {}, "points:<10", {}, {}, {0}, 20, 1, FREQUENCY, {true}, Index::DROP_TOKENS_THRESHOLD,
|
||||
spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 5,
|
||||
"", 10, {}, {}, {}, 0,
|
||||
"<mark>", "</mark>", {}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7,
|
||||
fallback,
|
||||
4, {off}, 32767, 32767, 2,
|
||||
false, true, "vec:([0.96826, 0.94, 0.39557, 0.306488], flat_search_cutoff: 1000)").get();
|
||||
|
||||
ASSERT_EQ(9, results["found"].get<size_t>());
|
||||
ASSERT_EQ(9, results["hits"].size());
|
||||
|
||||
// single point
|
||||
|
||||
results = coll1->search("*", {}, "points:1", {}, {}, {0}, 20, 1, FREQUENCY, {true}, Index::DROP_TOKENS_THRESHOLD,
|
||||
spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 5,
|
||||
"", 10, {}, {}, {}, 0,
|
||||
"<mark>", "</mark>", {}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7,
|
||||
fallback,
|
||||
4, {off}, 32767, 32767, 2,
|
||||
false, true, "vec:([0.96826, 0.94, 0.39557, 0.306488], flat_search_cutoff: 0)").get();
|
||||
|
||||
ASSERT_EQ(1, results["found"].get<size_t>());
|
||||
ASSERT_EQ(1, results["hits"].size());
|
||||
|
||||
results = coll1->search("*", {}, "points:1", {}, {}, {0}, 20, 1, FREQUENCY, {true}, Index::DROP_TOKENS_THRESHOLD,
|
||||
spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 5,
|
||||
"", 10, {}, {}, {}, 0,
|
||||
"<mark>", "</mark>", {}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7,
|
||||
fallback,
|
||||
4, {off}, 32767, 32767, 2,
|
||||
false, true, "vec:([0.96826, 0.94, 0.39557, 0.306488], flat_search_cutoff: 1000)").get();
|
||||
|
||||
ASSERT_EQ(1, results["found"].get<size_t>());
|
||||
ASSERT_EQ(1, results["hits"].size());
|
||||
}
|
||||
|
||||
TEST_F(CollectionVectorTest, VectorSearchTestDeletion) {
|
||||
nlohmann::json schema = R"({
|
||||
"name": "coll1",
|
||||
|
Loading…
x
Reference in New Issue
Block a user