Ensure that null valued nested keys are not persisted.

This commit is contained in:
Kishore Nallan 2024-04-08 17:22:45 +05:30
parent ab9936f96b
commit 55ee87dfb2
2 changed files with 59 additions and 0 deletions

View File

@ -492,6 +492,13 @@ void Index::validate_and_preprocess(Index *index,
get_doc_changes(index_rec.operation, embedding_fields, index_rec.doc, index_rec.old_doc,
index_rec.new_doc, index_rec.del_doc);
/*if(index_rec.seq_id == 0) {
LOG(INFO) << "index_rec.doc: " << index_rec.doc;
LOG(INFO) << "index_rec.old_doc: " << index_rec.old_doc;
LOG(INFO) << "index_rec.new_doc: " << index_rec.new_doc;
LOG(INFO) << "index_rec.del_doc: " << index_rec.del_doc;
}*/
if(generate_embeddings) {
for(auto& field: index_rec.doc.items()) {
for(auto& embedding_field : embedding_fields) {
@ -7125,6 +7132,8 @@ void Index::get_doc_changes(const index_operation_t op, const tsl::htrie_map<cha
if(op == UPSERT) {
new_doc = update_doc;
new_doc.merge_patch(update_doc); // ensures that null valued keys are deleted
// since UPSERT could replace a doc with lesser fields, we have to add those missing fields to del_doc
for(auto it = old_doc.begin(); it != old_doc.end(); ++it) {
if(it.value().is_object() || (it.value().is_array() && (it.value().empty() || it.value()[0].is_object()))) {

View File

@ -2835,6 +2835,56 @@ TEST_F(CollectionNestedFieldsTest, EmplaceWithNullValueOnOptionalField) {
ASSERT_EQ(0, results["hits"][0]["document"]["currency"].size());
}
TEST_F(CollectionNestedFieldsTest, UpsertWithNullValueOnOptionalField) {
nlohmann::json schema = R"({
"name": "coll1",
"enable_nested_fields": true,
"fields": [
{"name": "status", "type": "object"},
{"name": "title", "type": "string"}
]
})"_json;
auto op = collectionManager.create_collection(schema);
ASSERT_TRUE(op.ok());
Collection* coll1 = op.get();
auto doc1 = R"({
"id": "0",
"title": "Title Alpha",
"status": {"name": "Foo"}
})"_json;
auto add_op = coll1->add(doc1.dump(), UPSERT);
ASSERT_TRUE(add_op.ok());
auto results = coll1->search("alpha", {"title"}, "", {}, {}, {0}, 10, 1, FREQUENCY, {false}).get();
ASSERT_EQ(1, results["found"].get<size_t>());
ASSERT_EQ(3, results["hits"][0]["document"].size()); // id, title, status
ASSERT_EQ(1, results["hits"][0]["document"]["status"].size());
results = coll1->search("foo", {"status"}, "", {}, {}, {0}, 10, 1, FREQUENCY, {false}).get();
ASSERT_EQ(1, results["found"].get<size_t>());
// upsert again with null value
doc1 = R"({
"id": "0",
"title": "Title Alpha",
"status": {"name": null}
})"_json;
add_op = coll1->add(doc1.dump(), UPSERT);
ASSERT_TRUE(add_op.ok());
results = coll1->search("alpha", {"title"}, "", {}, {}, {0}, 10, 1, FREQUENCY, {false}).get();
ASSERT_EQ(1, results["found"].get<size_t>());
ASSERT_EQ(3, results["hits"][0]["document"].size()); // id, title, status
ASSERT_EQ(0, results["hits"][0]["document"]["status"].size());
results = coll1->search("foo", {"status"}, "", {}, {}, {0}, 10, 1, FREQUENCY, {false}).get();
ASSERT_EQ(0, results["found"].get<size_t>());
}
TEST_F(CollectionNestedFieldsTest, EmplaceWithMissingArrayValueOnOptionalField) {
nlohmann::json schema = R"({
"name": "coll1",