From 46b5847869d91c5ecbcaf737c37ae9a82bf04f9e Mon Sep 17 00:00:00 2001 From: ozanarmagan Date: Mon, 30 Oct 2023 09:33:12 +0300 Subject: [PATCH] Fix updating auto embedding field indexes --- src/index.cpp | 9 +-- test/collection_vector_search_test.cpp | 93 ++++++++++++++++++++++++++ 2 files changed, 96 insertions(+), 6 deletions(-) diff --git a/src/index.cpp b/src/index.cpp index f027c09e..f9d755d8 100644 --- a/src/index.cpp +++ b/src/index.cpp @@ -6722,13 +6722,10 @@ void Index::batch_embed_fields(std::vector& records, texts_to_embed[i].first->index_failure(embedding_res.status_code, ""); continue; } - nlohmann::json* document; if(texts_to_embed[i].first->is_update) { - document = &texts_to_embed[i].first->new_doc; - } else { - document = &texts_to_embed[i].first->doc; - } - (*document)[field.name] = embedding_res.embedding; + texts_to_embed[i].first->new_doc[field.name] = embedding_res.embedding; + } + texts_to_embed[i].first->doc[field.name] = embedding_res.embedding; } } } diff --git a/test/collection_vector_search_test.cpp b/test/collection_vector_search_test.cpp index 04cd0096..72244efe 100644 --- a/test/collection_vector_search_test.cpp +++ b/test/collection_vector_search_test.cpp @@ -2729,4 +2729,97 @@ TEST_F(CollectionVectorTest, TestSearchNonIndexedVectorField) { ASSERT_FALSE(search_result.ok()); ASSERT_EQ("Field `vec` is marked as a non-indexed field in the schema.", search_result.error()); +} + + +TEST_F(CollectionVectorTest, TestSemanticSearchAfterUpdate) { + nlohmann::json schema = R"({ + "name": "test", + "fields": [ + { + "name": "name", + "type": "string" + }, + { + "name": "embedding", + "type": "float[]", + "embed": { + "from": [ + "name" + ], + "model_config": { + "model_name": "ts/e5-small" + } + } + } + ] + })"_json; + + TextEmbedderManager::set_model_dir("/tmp/typesense_test/models"); + + auto collection_create_op = collectionManager.create_collection(schema); + ASSERT_TRUE(collection_create_op.ok()); + + auto coll = collection_create_op.get(); + + auto add_op = coll->add(R"({ + "name": "soccer", + "id": "0" + })"_json.dump()); + + ASSERT_TRUE(add_op.ok()); + + add_op = coll->add(R"({ + "name": "basketball", + "id": "1" + })"_json.dump()); + + ASSERT_TRUE(add_op.ok()); + + add_op = coll->add(R"({ + "name": "typesense", + "id": "2" + })"_json.dump()); + + ASSERT_TRUE(add_op.ok()); + + add_op = coll->add(R"({ + "name": "potato", + "id": "3" + })"_json.dump()); + + ASSERT_TRUE(add_op.ok()); + + auto result = coll->search("*", {}, "", {}, {}, {0}, 20, 1, FREQUENCY, {true}, Index::DROP_TOKENS_THRESHOLD, + spp::sparse_hash_set(), + spp::sparse_hash_set(), 10, "", 30, 5, + "", 10, {}, {}, {}, 0, + "", "", {}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7, + fallback, + 4, {off}, 32767, 32767, 2, + false, true, "embedding:([], id:0, k:1)"); + + ASSERT_TRUE(result.ok()); + ASSERT_EQ(1, result.get()["hits"].size()); + ASSERT_EQ("basketball", result.get()["hits"][0]["document"]["name"]); + + auto update_op = coll->add(R"({ + "name": "onion", + "id": "0" + })"_json.dump(), index_operation_t::UPDATE, "0"); + + ASSERT_TRUE(update_op.ok()); + + result = coll->search("*", {}, "", {}, {}, {0}, 20, 1, FREQUENCY, {true}, Index::DROP_TOKENS_THRESHOLD, + spp::sparse_hash_set(), + spp::sparse_hash_set(), 10, "", 30, 5, + "", 10, {}, {}, {}, 0, + "", "", {}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7, + fallback, + 4, {off}, 32767, 32767, 2, + false, true, "embedding:([], id:0, k:1)"); + + ASSERT_TRUE(result.ok()); + ASSERT_EQ(1, result.get()["hits"].size()); + ASSERT_EQ("potato", result.get()["hits"][0]["document"]["name"]); } \ No newline at end of file