Merge pull request #1234 from ozanarmagan/v0.25-join

Fix extracting field names for two embedding fields with same prefix
This commit is contained in:
Kishore Nallan 2023-09-20 16:56:39 +05:30 committed by GitHub
commit c798966a50
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 62 additions and 0 deletions

View File

@ -1044,6 +1044,10 @@ Option<bool> Collection::extract_field_name(const std::string& field_name,
continue;
}
if(!exact_key_match && text_embedding) {
continue;
}
if (exact_primitive_match || is_wildcard || text_embedding ||
// field_name prefix must be followed by a "." to indicate an object search
(enable_nested_fields && kv.key().size() > field_name.size() && kv.key()[field_name.size()] == '.')) {

View File

@ -2018,5 +2018,63 @@ TEST_F(CollectionVectorTest, TestMultilingualE5) {
1, FREQUENCY, {true},
0, spp::sparse_hash_set<std::string>());
ASSERT_TRUE(semantic_results.ok());
}
TEST_F(CollectionVectorTest, TestTwoEmbeddingFieldsSamePrefix) {
nlohmann::json schema = R"({
"name": "docs",
"fields": [
{
"name": "title",
"type": "string"
},
{
"name": "embedding",
"type": "float[]",
"embed": {
"from": [
"title"
],
"model_config": {
"model_name": "ts/e5-small"
}
}
},
{
"name": "embedding_en",
"type": "float[]",
"embed": {
"from": [
"title"
],
"model_config": {
"model_name": "ts/e5-small"
}
}
}
]
})"_json;
TextEmbedderManager::set_model_dir("/tmp/typesense_test/models");
auto collection_create_op = collectionManager.create_collection(schema);
ASSERT_TRUE(collection_create_op.ok());
auto coll1 = collection_create_op.get();
auto add_op = coll1->add(R"({
"title": "john doe"
})"_json.dump());
ASSERT_TRUE(add_op.ok());
auto semantic_results = coll1->search("john", {"embedding"},
"", {}, {}, {2}, 10,
1, FREQUENCY, {true},
0, spp::sparse_hash_set<std::string>());
ASSERT_TRUE(semantic_results.ok());
}