Always do validations during schema alter reindexing.

This commit is contained in:
Kishore Nallan 2022-09-11 16:29:48 +05:30
parent 36f38c31c1
commit f9f7afff52
3 changed files with 70 additions and 5 deletions

View File

@ -3349,17 +3349,14 @@ Option<bool> Collection::alter(nlohmann::json& alter_payload) {
LOG(INFO) << "Processing field additions and deletions first...";
}
auto batch_alter_op = batch_alter_data(addition_fields, del_fields, fallback_field_type, false);
auto batch_alter_op = batch_alter_data(addition_fields, del_fields, fallback_field_type, true);
if(!batch_alter_op.ok()) {
return batch_alter_op;
}
if(!reindex_fields.empty()) {
LOG(INFO) << "Processing field modifications now...";
// we've to run revaliation because during schema change, some coercion might be needed
// e.g. "123" -> 123 (string to integer)
bool do_validation = true;
batch_alter_op = batch_alter_data(reindex_fields, {}, fallback_field_type, do_validation);
batch_alter_op = batch_alter_data(reindex_fields, {}, fallback_field_type, true);
if(!batch_alter_op.ok()) {
return batch_alter_op;
}

View File

@ -1090,6 +1090,42 @@ TEST_F(CollectionSchemaChangeTest, IndexFalseToTrue) {
ASSERT_EQ(1, res_op.get()["facet_counts"].size());
}
TEST_F(CollectionSchemaChangeTest, AddingFieldWithExistingNullValue) {
// when a value is `null` initially, and is altered, subsequent updates should not fail
nlohmann::json schema = R"({
"name": "coll1",
"fields": [
{"name": "title", "type": "string"}
]
})"_json;
Collection* coll1 = collectionManager.create_collection(schema).get();
nlohmann::json doc;
doc["id"] = "0";
doc["title"] = "Sample Title 1";
doc["num"] = nullptr;
ASSERT_TRUE(coll1->add(doc.dump()).ok());
auto schema_changes = R"({
"fields": [
{"name": "num", "type": "int32", "optional": true}
]
})"_json;
auto alter_op = coll1->alter(schema_changes);
ASSERT_TRUE(alter_op.ok());
// now try updating the doc
doc["id"] = "0";
doc["title"] = "Sample Title 1";
doc["num"] = 100;
ASSERT_TRUE(coll1->add(doc.dump(), UPSERT).ok());
auto res = coll1->search("*", {}, "num:100", {}, {}, {2}, 10, 1, FREQUENCY, {true}).get();
ASSERT_EQ(1, res["hits"].size());
}
TEST_F(CollectionSchemaChangeTest, DropIntegerFieldAndAddStringValues) {
nlohmann::json schema = R"({
"name": "coll1",

View File

@ -1307,3 +1307,35 @@ TEST_F(CollectionSpecificMoreTest, WildcardSearchWithNoSortingField) {
ASSERT_EQ("1", res["hits"][0]["document"]["id"].get<std::string>());
ASSERT_EQ("0", res["hits"][1]["document"]["id"].get<std::string>());
}
TEST_F(CollectionSpecificMoreTest, AutoSchemaWithObjectValueAsFirstDoc) {
// when a value is `object` initially and then is integer, updating that object should not cause errors
nlohmann::json schema = R"({
"name": "coll1",
"fields": [
{"name": ".*", "type": "auto"}
]
})"_json;
Collection* coll1 = collectionManager.create_collection(schema).get();
nlohmann::json doc;
doc["id"] = "0";
doc["title"] = "Sample Title 1";
doc["num"] = nlohmann::json::object();
ASSERT_TRUE(coll1->add(doc.dump()).ok());
doc["id"] = "1";
doc["title"] = "Sample Title 2";
doc["num"] = 42;
ASSERT_TRUE(coll1->add(doc.dump()).ok());
// now try updating first doc
doc["id"] = "0";
doc["title"] = "Sample Title 1";
doc["num"] = 100;
ASSERT_TRUE(coll1->add(doc.dump(), UPSERT).ok());
auto res = coll1->search("*", {}, "num:100", {}, {}, {2}, 10, 1, FREQUENCY, {true}).get();
ASSERT_EQ(1, res["hits"].size());
}