From 46b5847869d91c5ecbcaf737c37ae9a82bf04f9e Mon Sep 17 00:00:00 2001
From: ozanarmagan <o.armagan2020@gtu.edu.tr>
Date: Mon, 30 Oct 2023 09:33:12 +0300
Subject: [PATCH] Fix updating auto embedding field indexes

---
 src/index.cpp                          |  9 +--
 test/collection_vector_search_test.cpp | 93 ++++++++++++++++++++++++++
 2 files changed, 96 insertions(+), 6 deletions(-)
diff --git a/src/index.cpp b/src/index.cpp
index f027c09e..f9d755d8 100644
--- a/src/index.cpp
+++ b/src/index.cpp
@@ -6722,13 +6722,10 @@ void Index::batch_embed_fields(std::vector<index_record*>& records,
                 texts_to_embed[i].first->index_failure(embedding_res.status_code, "");
                 continue;
             }
-            nlohmann::json* document;
             if(texts_to_embed[i].first->is_update) {
-                document = &texts_to_embed[i].first->new_doc;
-            } else {
-                document = &texts_to_embed[i].first->doc;
-            }
-            (*document)[field.name] = embedding_res.embedding;
+                texts_to_embed[i].first->new_doc[field.name] = embedding_res.embedding;
+            } 
+            texts_to_embed[i].first->doc[field.name] = embedding_res.embedding;
         }
     }
 }
diff --git a/test/collection_vector_search_test.cpp b/test/collection_vector_search_test.cpp
index 04cd0096..72244efe 100644
--- a/test/collection_vector_search_test.cpp
+++ b/test/collection_vector_search_test.cpp
@@ -2729,4 +2729,97 @@ TEST_F(CollectionVectorTest, TestSearchNonIndexedVectorField) {
     
     ASSERT_FALSE(search_result.ok());
     ASSERT_EQ("Field `vec` is marked as a non-indexed field in the schema.", search_result.error());
+}
+
+
+TEST_F(CollectionVectorTest, TestSemanticSearchAfterUpdate) {
+    nlohmann::json schema = R"({
+                "name": "test",
+                "fields": [
+                    {
+                        "name": "name",
+                        "type": "string"
+                    },
+                    {
+                        "name": "embedding",
+                        "type": "float[]",
+                        "embed": {
+                            "from": [
+                                "name"
+                            ],
+                            "model_config": {
+                                "model_name": "ts/e5-small"
+                            }
+                        }
+                    }
+                ]
+                })"_json;
+    
+    TextEmbedderManager::set_model_dir("/tmp/typesense_test/models");
+
+    auto collection_create_op = collectionManager.create_collection(schema);
+    ASSERT_TRUE(collection_create_op.ok());
+
+    auto coll = collection_create_op.get();
+
+    auto add_op = coll->add(R"({
+        "name": "soccer",
+        "id": "0"
+    })"_json.dump());
+
+    ASSERT_TRUE(add_op.ok());
+
+    add_op = coll->add(R"({
+        "name": "basketball",
+        "id": "1"
+    })"_json.dump());
+
+    ASSERT_TRUE(add_op.ok());
+
+    add_op = coll->add(R"({
+        "name": "typesense",
+        "id": "2"
+    })"_json.dump());
+
+    ASSERT_TRUE(add_op.ok());
+
+    add_op = coll->add(R"({
+        "name": "potato",
+        "id": "3"
+    })"_json.dump());
+
+    ASSERT_TRUE(add_op.ok());
+
+    auto result = coll->search("*", {}, "", {}, {}, {0}, 20, 1, FREQUENCY, {true}, Index::DROP_TOKENS_THRESHOLD,
+                                 spp::sparse_hash_set<std::string>(),
+                                 spp::sparse_hash_set<std::string>(), 10, "", 30, 5,
+                                 "", 10, {}, {}, {}, 0,
+                                 "<mark>", "</mark>", {}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7,
+                                 fallback,
+                                 4, {off}, 32767, 32767, 2,
+                                 false, true, "embedding:([], id:0, k:1)");
+    
+    ASSERT_TRUE(result.ok());
+    ASSERT_EQ(1, result.get()["hits"].size());
+    ASSERT_EQ("basketball", result.get()["hits"][0]["document"]["name"]);
+
+    auto update_op = coll->add(R"({
+        "name": "onion",
+        "id": "0"
+    })"_json.dump(), index_operation_t::UPDATE, "0");
+
+    ASSERT_TRUE(update_op.ok());
+
+    result = coll->search("*", {}, "", {}, {}, {0}, 20, 1, FREQUENCY, {true}, Index::DROP_TOKENS_THRESHOLD,
+                                 spp::sparse_hash_set<std::string>(),
+                                 spp::sparse_hash_set<std::string>(), 10, "", 30, 5,
+                                 "", 10, {}, {}, {}, 0,
+                                 "<mark>", "</mark>", {}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7,
+                                 fallback,
+                                 4, {off}, 32767, 32767, 2,
+                                 false, true, "embedding:([], id:0, k:1)");
+
+    ASSERT_TRUE(result.ok());
+    ASSERT_EQ(1, result.get()["hits"].size());
+    ASSERT_EQ("potato", result.get()["hits"][0]["document"]["name"]);   
 }
\ No newline at end of file