From 32336e4d9399f89de41235f06d35eeeef6852c3b Mon Sep 17 00:00:00 2001 From: Kishore Nallan Date: Fri, 19 May 2023 21:27:35 +0530 Subject: [PATCH] Do coercion by default for collection loading / schema alter. --- src/collection.cpp | 2 +- src/collection_manager.cpp | 2 +- test/collection_manager_test.cpp | 49 +++++++++++++++ test/collection_schema_change_test.cpp | 82 ++++++++++++++++++++++---- 4 files changed, 123 insertions(+), 12 deletions(-) diff --git a/src/collection.cpp b/src/collection.cpp index 7d7d031b..b9c5a012 100644 --- a/src/collection.cpp +++ b/src/collection.cpp @@ -4227,7 +4227,7 @@ Option Collection::validate_alter_payload(nlohmann::json& schema_changes, index_operation_t::CREATE, false, fallback_field_type, - DIRTY_VALUES::REJECT); + DIRTY_VALUES::COERCE_OR_REJECT); if(!validate_op.ok()) { std::string err_message = validate_op.error(); diff --git a/src/collection_manager.cpp b/src/collection_manager.cpp index 52b6ce15..4fe4702e 100644 --- a/src/collection_manager.cpp +++ b/src/collection_manager.cpp @@ -1325,7 +1325,7 @@ Option CollectionManager::load_collection(const nlohmann::json &collection field::flatten_doc(document, collection->get_nested_fields(), {}, true, flattened_fields); } - auto dirty_values = DIRTY_VALUES::DROP; + auto dirty_values = DIRTY_VALUES::COERCE_OR_DROP; num_valid_docs++; diff --git a/test/collection_manager_test.cpp b/test/collection_manager_test.cpp index 38c7e014..e821e51b 100644 --- a/test/collection_manager_test.cpp +++ b/test/collection_manager_test.cpp @@ -793,6 +793,55 @@ TEST_F(CollectionManagerTest, RestoreNestedDocsOnRestart) { collectionManager2.drop_collection("coll1"); } +TEST_F(CollectionManagerTest, RestoreCoercedDocValuesOnRestart) { + nlohmann::json schema = R"({ + "name": "coll1", + "enable_nested_fields": true, + "fields": [ + {"name": "product", "type": "object" }, + {"name": "product.price", "type": "int64" } + ] + })"_json; + + auto op = collectionManager.create_collection(schema); + ASSERT_TRUE(op.ok()); + Collection* coll1 = op.get(); + + auto doc1 = R"({ + "product": {"price": 45.78} + })"_json; + + auto create_op = coll1->add(doc1.dump(), CREATE); + ASSERT_TRUE(create_op.ok()); + + auto res_op = coll1->search("*", {}, "product.price:>0", {}, {}, {0}, 10, 1, + token_ordering::FREQUENCY, {true}); + ASSERT_TRUE(res_op.ok()); + ASSERT_EQ(1, res_op.get()["found"].get()); + + // create a new collection manager to ensure that it restores the records from the disk backed store + CollectionManager& collectionManager2 = CollectionManager::get_instance(); + collectionManager2.init(store, 1.0, "auth_key", quit); + auto load_op = collectionManager2.load(8, 1000); + + if(!load_op.ok()) { + LOG(ERROR) << load_op.error(); + } + + ASSERT_TRUE(load_op.ok()); + + auto restored_coll = collectionManager2.get_collection("coll1").get(); + ASSERT_NE(nullptr, restored_coll); + + res_op = restored_coll->search("*", {}, "product.price:>0", {}, {}, {0}, 10, 1, + token_ordering::FREQUENCY, {true}); + ASSERT_TRUE(res_op.ok()); + ASSERT_EQ(1, res_op.get()["found"].get()); + + collectionManager.drop_collection("coll1"); + collectionManager2.drop_collection("coll1"); +} + TEST_F(CollectionManagerTest, DropCollectionCleanly) { std::ifstream infile(std::string(ROOT_DIR)+"test/multi_field_documents.jsonl"); std::string json_line; diff --git a/test/collection_schema_change_test.cpp b/test/collection_schema_change_test.cpp index 88d8e940..48a64c3a 100644 --- a/test/collection_schema_change_test.cpp +++ b/test/collection_schema_change_test.cpp @@ -542,7 +542,7 @@ TEST_F(CollectionSchemaChangeTest, AbilityToDropAndReAddIndexAtTheSameTime) { nlohmann::json doc; doc["id"] = "0"; - doc["title"] = "123"; + doc["title"] = "Hello"; doc["timestamp"] = 3433232; ASSERT_TRUE(coll1->add(doc.dump()).ok()); @@ -562,7 +562,7 @@ TEST_F(CollectionSchemaChangeTest, AbilityToDropAndReAddIndexAtTheSameTime) { "Existing data for field `title` cannot be coerced into an int32.", alter_op.error()); // existing data should not have been touched - auto res = coll1->search("12", {"title"}, "", {}, {}, {0}, 10, 1, FREQUENCY, {true}, 10).get(); + auto res = coll1->search("he", {"title"}, "", {}, {}, {0}, 10, 1, FREQUENCY, {true}, 10).get(); ASSERT_EQ(1, res["hits"].size()); ASSERT_EQ("0", res["hits"][0]["document"]["id"].get()); @@ -586,7 +586,7 @@ TEST_F(CollectionSchemaChangeTest, AbilityToDropAndReAddIndexAtTheSameTime) { ASSERT_EQ(4, res["facet_counts"][0].size()); ASSERT_EQ("title", res["facet_counts"][0]["field_name"]); ASSERT_EQ(1, res["facet_counts"][0]["counts"].size()); - ASSERT_EQ("123", res["facet_counts"][0]["counts"][0]["value"].get()); + ASSERT_EQ("Hello", res["facet_counts"][0]["counts"][0]["value"].get()); // migrate int32 to int64 schema_changes = R"({ @@ -801,7 +801,7 @@ TEST_F(CollectionSchemaChangeTest, DropFieldNotExistingInDocuments) { ASSERT_TRUE(alter_op.ok()); } -TEST_F(CollectionSchemaChangeTest, ChangeFieldToCoercableTypeIsNotAllowed) { +TEST_F(CollectionSchemaChangeTest, ChangeFieldToCoercableTypeIsAllowed) { // optional title field std::vector fields = {field("title", field_types::STRING, false, true, true, "", 1, 1), field("points", field_types::INT32, true),}; @@ -823,9 +823,7 @@ TEST_F(CollectionSchemaChangeTest, ChangeFieldToCoercableTypeIsNotAllowed) { })"_json; auto alter_op = coll1->alter(schema_changes); - ASSERT_FALSE(alter_op.ok()); - ASSERT_EQ("Schema change is incompatible with the type of documents already stored in this collection. " - "Existing data for field `points` cannot be coerced into a string.", alter_op.error()); + ASSERT_TRUE(alter_op.ok()); } TEST_F(CollectionSchemaChangeTest, ChangeFromPrimitiveToDynamicField) { @@ -1140,7 +1138,7 @@ TEST_F(CollectionSchemaChangeTest, DropIntegerFieldAndAddStringValues) { nlohmann::json doc; doc["id"] = "0"; - doc["label"] = 1000; + doc["label"] = "hello"; doc["title"] = "Foo"; auto add_op = coll1->add(doc.dump()); ASSERT_TRUE(add_op.ok()); @@ -1157,7 +1155,7 @@ TEST_F(CollectionSchemaChangeTest, DropIntegerFieldAndAddStringValues) { // add new document with a string label doc["id"] = "1"; - doc["label"] = "abcdef"; + doc["label"] = 1000; doc["title"] = "Bar"; add_op = coll1->add(doc.dump()); ASSERT_TRUE(add_op.ok()); @@ -1173,7 +1171,7 @@ TEST_F(CollectionSchemaChangeTest, DropIntegerFieldAndAddStringValues) { alter_op = coll1->alter(schema_changes); ASSERT_FALSE(alter_op.ok()); ASSERT_EQ("Schema change is incompatible with the type of documents already stored in this collection. " - "Existing data for field `label` cannot be coerced into a string.", alter_op.error()); + "Existing data for field `label` cannot be coerced into an int64.", alter_op.error()); // but should allow the problematic field to be dropped schema_changes = R"({ @@ -1411,6 +1409,70 @@ TEST_F(CollectionSchemaChangeTest, DropAndReAddNestedObject) { ASSERT_EQ(4, schema_map.size()); } +TEST_F(CollectionSchemaChangeTest, UpdateAfterNestedNullValue) { + nlohmann::json schema = R"({ + "name": "coll1", + "enable_nested_fields": true, + "fields": [ + {"name": "lines", "optional": false, "type": "object[]"}, + {"name": "lines.name", "optional": true, "type": "string[]"} + ] + })"_json; + + Collection* coll1 = collectionManager.create_collection(schema).get(); + + nlohmann::json doc = R"( + {"id": "1", "lines": [{"name": null}]} + )"_json; + + auto add_op = coll1->add(doc.dump(), CREATE, "1", DIRTY_VALUES::DROP); + ASSERT_TRUE(add_op.ok()); + + // add new field + + auto schema_changes = R"({ + "fields": [ + {"name": "title", "type": "string", "optional": true} + ] + })"_json; + + auto alter_op = coll1->alter(schema_changes); + ASSERT_TRUE(alter_op.ok()); +} + +TEST_F(CollectionSchemaChangeTest, AlterShouldBeAbleToHandleFieldValueCoercion) { + nlohmann::json schema = R"({ + "name": "coll1", + "enable_nested_fields": true, + "fields": [ + {"name": "product", "optional": false, "type": "object"}, + {"name": "product.price", "type": "int64"}, + {"name": "title", "type": "string"}, + {"name": "description", "type": "string"} + ] + })"_json; + + Collection* coll1 = collectionManager.create_collection(schema).get(); + + nlohmann::json doc = R"( + {"id": "0", "product": {"price": 56.45}, "title": "Title 1", "description": "Description 1"} + )"_json; + + auto add_op = coll1->add(doc.dump(), CREATE, "0", DIRTY_VALUES::COERCE_OR_REJECT); + ASSERT_TRUE(add_op.ok()); + + // drop a field + + auto schema_changes = R"({ + "fields": [ + {"name": "description", "drop": true} + ] + })"_json; + + auto alter_op = coll1->alter(schema_changes); + ASSERT_TRUE(alter_op.ok()); +} + TEST_F(CollectionSchemaChangeTest, GeoFieldSchemaAddition) { nlohmann::json schema = R"({ "name": "coll1",