Merge pull request #1262 from ozanarmagan/v0.25-join

Prevent using vector fields in query_by
This commit is contained in:
Kishore Nallan 2023-09-29 07:26:58 +05:30 committed by GitHub
commit e80eba7b69
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 43 additions and 0 deletions

View File

@ -1233,6 +1233,11 @@ Option<nlohmann::json> Collection::search(std::string raw_query,
continue;
}
if(embedding_fields.find(search_field.name) == embedding_fields.end()) {
std::string error = "Vector field `" + search_field.name + "` is not an auto-embedding field, do not use `query_by` with it, use `vector_query` instead.";
return Option<nlohmann::json>(400, error);
}
TextEmbedderManager& embedder_manager = TextEmbedderManager::get_instance();
auto embedder_op = embedder_manager.get_text_embedder(search_field.embed[fields::model_config]);
if(!embedder_op.ok()) {

View File

@ -2177,4 +2177,42 @@ TEST_F(CollectionVectorTest, HybridSearchOnlyKeyworMatchDoNotHaveVectorDistance)
ASSERT_EQ(1, hybrid_results.get()["hits"].size());
ASSERT_EQ(0, hybrid_results.get()["hits"][0].count("vector_distance"));
}
TEST_F(CollectionVectorTest, QueryByNotAutoEmbeddingVectorField) {
nlohmann::json schema = R"({
"name": "test",
"fields": [
{
"name": "title",
"type": "string"
},
{
"name": "embedding",
"type": "float[]",
"num_dim": 384
}
]
})"_json;
TextEmbedderManager::set_model_dir("/tmp/typesense_test/models");
auto collection_create_op = collectionManager.create_collection(schema);
ASSERT_TRUE(collection_create_op.ok());
auto coll = collection_create_op.get();
auto search_res = coll->search("john", {"title", "embedding"}, "", {}, {}, {0}, 20, 1, FREQUENCY, {true}, Index::DROP_TOKENS_THRESHOLD,
spp::sparse_hash_set<std::string>(),
spp::sparse_hash_set<std::string>(), 10, "", 30, 5,
"", 10, {}, {}, {}, 0,
"<mark>", "</mark>", {}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7,
fallback,
4, {off}, 32767, 32767, 2,
false, true, "embedding:([0.96826, 0.94, 0.39557, 0.306488])");
ASSERT_FALSE(search_res.ok());
ASSERT_EQ("Vector field `embedding` is not an auto-embedding field, do not use `query_by` with it, use `vector_query` instead.", search_res.error());
}