Return vector distance when sort by vector query.

This commit is contained in:
Kishore Nallan 2024-07-31 13:43:53 +05:30
parent fc153ae192
commit 05b0faa955
2 changed files with 46 additions and 0 deletions

View File

@ -2826,6 +2826,9 @@ Option<nlohmann::json> Collection::search(std::string raw_query,
reference_lat_lng, sort_field.unit);
} else if(sort_field.geopoint != 0) {
geo_distances[sort_field.name] = std::abs(field_order_kv->scores[sort_field_index]);
} else if(sort_field.name == sort_field_const::vector_query &&
!sort_field.vector_query.query.field_name.empty()) {
wrapper_doc["vector_distance"] = Index::int64_t_to_float(field_order_kv->scores[sort_field_index]);
}
}

View File

@ -1272,6 +1272,49 @@ TEST_F(CollectionVectorTest, EmbeddOptionalFieldNullValueUpsert) {
ASSERT_EQ("Field `tags` must be an array of string.", add_op.error());
}
TEST_F(CollectionVectorTest, SortKeywordSearchWithAutoEmbedVector) {
nlohmann::json schema = R"({
"name": "coll1",
"fields": [
{"name": "title", "type": "string"},
{"name": "points", "type": "int32"},
{"name": "embedding", "type":"float[]", "embed":{"from": ["title"],
"model_config": {"model_name": "ts/e5-small"}}}
]
})"_json;
EmbedderManager::set_model_dir("/tmp/typesense_test/models");
Collection* coll1 = collectionManager.create_collection(schema).get();
nlohmann::json doc;
doc["id"] = "0";
doc["title"] = "The Lord of the Rings";
doc["points"] = 100;
auto add_op = coll1->add(doc.dump());
ASSERT_TRUE(add_op.ok());
std::vector<sort_by> sort_by_list = {sort_by("_vector_query(embedding:([]))", "asc")};
auto results = coll1->search("lord", {"title"}, "", {}, sort_by_list, {0}, 10, 1, FREQUENCY, {true},
Index::DROP_TOKENS_THRESHOLD,
spp::sparse_hash_set<std::string>(),
spp::sparse_hash_set<std::string>()).get();
LOG(INFO) << results["hits"][0]["vector_distance"].get<float>();
//ASSERT_EQ(1, results["found"].get<size_t>());
//ASSERT_EQ(1.0f, results["hits"][0]["vector_distance"].get<float>());
results = coll1->search("lord", {"title"}, "", {}, sort_by_list, {0}, 10, 1, FREQUENCY, {true},
Index::DROP_TOKENS_THRESHOLD,
spp::sparse_hash_set<std::string>(),
spp::sparse_hash_set<std::string>()).get();
LOG(INFO) << results["hits"][0]["vector_distance"].get<float>();
}
TEST_F(CollectionVectorTest, HybridSearchWithExplicitVector) {
nlohmann::json schema = R"({
"name": "objects",