Merge pull request #1346 from ozanarmagan/v0.25-join

Fix updating auto embedding field indexes
2025-05-22 06:40:30 +08:00 · 2023-10-30 17:16:00 +05:30 · 2023-10-30 17:16:00 +05:30 · 813352ec48
commit 813352ec48
parent 9d0eaaad21 46b5847869
2 changed files with 96 additions and 6 deletions
--- a/src/index.cpp
+++ b/src/index.cpp
@ -6741,13 +6741,10 @@ void Index::batch_embed_fields(std::vector<index_record*>& records,
                texts_to_embed[i].first->index_failure(embedding_res.status_code, "");
                continue;
            }
-            nlohmann::json* document;
            if(texts_to_embed[i].first->is_update) {
-                document = &texts_to_embed[i].first->new_doc;
-            } else {
-                document = &texts_to_embed[i].first->doc;
-            }
-            (*document)[field.name] = embedding_res.embedding;
+                texts_to_embed[i].first->new_doc[field.name] = embedding_res.embedding;
+            } 
+            texts_to_embed[i].first->doc[field.name] = embedding_res.embedding;
        }
    }
 }
--- a/test/collection_vector_search_test.cpp
+++ b/test/collection_vector_search_test.cpp
@ -2729,4 +2729,97 @@ TEST_F(CollectionVectorTest, TestSearchNonIndexedVectorField) {
    
    ASSERT_FALSE(search_result.ok());
    ASSERT_EQ("Field `vec` is marked as a non-indexed field in the schema.", search_result.error());
+}
+
+
+TEST_F(CollectionVectorTest, TestSemanticSearchAfterUpdate) {
+    nlohmann::json schema = R"({
+                "name": "test",
+                "fields": [
+                    {
+                        "name": "name",
+                        "type": "string"
+                    },
+                    {
+                        "name": "embedding",
+                        "type": "float[]",
+                        "embed": {
+                            "from": [
+                                "name"
+                            ],
+                            "model_config": {
+                                "model_name": "ts/e5-small"
+                            }
+                        }
+                    }
+                ]
+                })"_json;
+    
+    TextEmbedderManager::set_model_dir("/tmp/typesense_test/models");
+
+    auto collection_create_op = collectionManager.create_collection(schema);
+    ASSERT_TRUE(collection_create_op.ok());
+
+    auto coll = collection_create_op.get();
+
+    auto add_op = coll->add(R"({
+        "name": "soccer",
+        "id": "0"
+    })"_json.dump());
+
+    ASSERT_TRUE(add_op.ok());
+
+    add_op = coll->add(R"({
+        "name": "basketball",
+        "id": "1"
+    })"_json.dump());
+
+    ASSERT_TRUE(add_op.ok());
+
+    add_op = coll->add(R"({
+        "name": "typesense",
+        "id": "2"
+    })"_json.dump());
+
+    ASSERT_TRUE(add_op.ok());
+
+    add_op = coll->add(R"({
+        "name": "potato",
+        "id": "3"
+    })"_json.dump());
+
+    ASSERT_TRUE(add_op.ok());
+
+    auto result = coll->search("*", {}, "", {}, {}, {0}, 20, 1, FREQUENCY, {true}, Index::DROP_TOKENS_THRESHOLD,
+                                 spp::sparse_hash_set<std::string>(),
+                                 spp::sparse_hash_set<std::string>(), 10, "", 30, 5,
+                                 "", 10, {}, {}, {}, 0,
+                                 "<mark>", "</mark>", {}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7,
+                                 fallback,
+                                 4, {off}, 32767, 32767, 2,
+                                 false, true, "embedding:([], id:0, k:1)");
+    
+    ASSERT_TRUE(result.ok());
+    ASSERT_EQ(1, result.get()["hits"].size());
+    ASSERT_EQ("basketball", result.get()["hits"][0]["document"]["name"]);
+
+    auto update_op = coll->add(R"({
+        "name": "onion",
+        "id": "0"
+    })"_json.dump(), index_operation_t::UPDATE, "0");
+
+    ASSERT_TRUE(update_op.ok());
+
+    result = coll->search("*", {}, "", {}, {}, {0}, 20, 1, FREQUENCY, {true}, Index::DROP_TOKENS_THRESHOLD,
+                                 spp::sparse_hash_set<std::string>(),
+                                 spp::sparse_hash_set<std::string>(), 10, "", 30, 5,
+                                 "", 10, {}, {}, {}, 0,
+                                 "<mark>", "</mark>", {}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7,
+                                 fallback,
+                                 4, {off}, 32767, 32767, 2,
+                                 false, true, "embedding:([], id:0, k:1)");
+
+    ASSERT_TRUE(result.ok());
+    ASSERT_EQ(1, result.get()["hits"].size());
+    ASSERT_EQ("potato", result.get()["hits"][0]["document"]["name"]);   
 }