From 03358bcf3713f7365141af88ea1a4748408616a0 Mon Sep 17 00:00:00 2001 From: Kishore Nallan Date: Mon, 17 Jun 2024 22:05:50 +0530 Subject: [PATCH] Fix null values in embed from field values. --- src/validator.cpp | 5 +++ test/collection_vector_search_test.cpp | 44 ++++++++++++++++++++++++++ 2 files changed, 49 insertions(+) diff --git a/src/validator.cpp b/src/validator.cpp index 55999e41..6e33725a 100644 --- a/src/validator.cpp +++ b/src/validator.cpp @@ -726,6 +726,11 @@ Option validator_t::validate_embed_fields(const nlohmann::json& document, continue; } } + + if(doc_field_it.value().is_null()) { + continue; + } + all_optional_and_null = false; if((schema_field_it.value().type == field_types::STRING && !doc_field_it.value().is_string()) || (schema_field_it.value().type == field_types::STRING_ARRAY && !doc_field_it.value().is_array())) { diff --git a/test/collection_vector_search_test.cpp b/test/collection_vector_search_test.cpp index 7b0ef8ba..c5ae9704 100644 --- a/test/collection_vector_search_test.cpp +++ b/test/collection_vector_search_test.cpp @@ -1228,6 +1228,50 @@ TEST_F(CollectionVectorTest, EmbeddedVectorUnchangedUpsert) { ASSERT_EQ(384, embedding.size()); } +TEST_F(CollectionVectorTest, EmbeddOptionalFieldNullValueUpsert) { + nlohmann::json schema = R"({ + "name": "coll1", + "fields": [ + {"name": "title", "type": "string"}, + {"name": "desc", "type": "string", "optional": true}, + {"name": "tags", "type": "string[]", "optional": true}, + {"name": "embedding", "type":"float[]", "embed":{"from": ["title", "desc", "tags"], + "model_config": {"model_name": "ts/e5-small"}}} + ] + })"_json; + + EmbedderManager::set_model_dir("/tmp/typesense_test/models"); + + Collection* coll1 = collectionManager.create_collection(schema).get(); + + nlohmann::json doc; + doc["id"] = "0"; + doc["title"] = "Title"; + doc["desc"] = nullptr; + doc["tags"] = {"foo", "bar"}; + + auto add_op = coll1->add(doc.dump(), UPSERT); + ASSERT_TRUE(add_op.ok()); + + auto results = coll1->search("title", {"embedding"}, "", {}, {}, {0}, 10, 1, FREQUENCY, {true}, Index::DROP_TOKENS_THRESHOLD, + spp::sparse_hash_set(), + spp::sparse_hash_set()).get(); + + ASSERT_EQ(1, results["found"].get()); + auto embedding = results["hits"][0]["document"]["embedding"].get>(); + ASSERT_EQ(384, embedding.size()); + + // upsert doc + add_op = coll1->add(doc.dump(), index_operation_t::UPSERT); + ASSERT_TRUE(add_op.ok()); + + // try with null values in array: not allowed + doc["tags"] = {"bar", nullptr}; + add_op = coll1->add(doc.dump(), index_operation_t::UPSERT); + ASSERT_FALSE(add_op.ok()); + ASSERT_EQ("Field `tags` must be an array of string.", add_op.error()); +} + TEST_F(CollectionVectorTest, HybridSearchWithExplicitVector) { nlohmann::json schema = R"({ "name": "objects",