From 0e3973632785141e05e202234109cc89b277a7e8 Mon Sep 17 00:00:00 2001 From: Kishore Nallan Date: Fri, 9 Sep 2022 14:14:21 +0530 Subject: [PATCH] Refactor / simplify schema update logic. --- include/collection.h | 9 +-- src/collection.cpp | 83 ++++++++++---------------- test/collection_schema_change_test.cpp | 4 +- 3 files changed, 38 insertions(+), 58 deletions(-) diff --git a/include/collection.h b/include/collection.h index f0bd2ff2..b0f09fcc 100644 --- a/include/collection.h +++ b/include/collection.h @@ -196,17 +196,14 @@ private: Option persist_collection_meta(); - Option batch_alter_data(const tsl::htrie_map& schema_additions, - const std::unordered_map& new_dynamic_fields, + Option batch_alter_data(const std::vector& alter_fields, const std::vector& del_fields, const std::string& this_fallback_field_type, const bool do_validation); Option validate_alter_payload(nlohmann::json& schema_changes, - tsl::htrie_map& schema_additions, - tsl::htrie_map& schema_reindex, - std::unordered_map& addition_dynamic_fields, - std::unordered_map& reindex_dynamic_fields, + std::vector& addition_fields, + std::vector& reindex_fields, std::vector& del_fields, std::string& fallback_field_type); diff --git a/src/collection.cpp b/src/collection.cpp index 0d47bece..d69923bb 100644 --- a/src/collection.cpp +++ b/src/collection.cpp @@ -3201,29 +3201,29 @@ Option Collection::persist_collection_meta() { return Option(true); } -Option Collection::batch_alter_data(const tsl::htrie_map& schema_additions, - const std::unordered_map& new_dynamic_fields, +Option Collection::batch_alter_data(const std::vector& alter_fields, const std::vector& del_fields, const std::string& this_fallback_field_type, const bool do_validation) { // Update schema with additions (deletions can only be made later) std::vector new_fields; - for(auto& f: schema_additions) { + tsl::htrie_map schema_additions; + + for(auto& f: alter_fields) { if(f.name == ".*") { fields.push_back(f); continue; } - search_schema.emplace(f.name, f); - new_fields.push_back(f); - fields.push_back(f); - } + if(f.is_dynamic()) { + dynamic_fields.emplace(f.name, f); + } else { + schema_additions.emplace(f.name, f); + search_schema.emplace(f.name, f); + new_fields.push_back(f); + } - for(auto& kv: new_dynamic_fields) { - // regexp fields and fields with auto type are treated as dynamic fields - const auto& f = kv.second; - dynamic_fields.emplace(f.name, f); fields.push_back(f); } @@ -3324,15 +3324,13 @@ Option Collection::alter(nlohmann::json& alter_payload) { std::unique_lock lock(mutex); // Validate that all stored documents are compatible with the proposed schema changes. - tsl::htrie_map schema_additions; - tsl::htrie_map schema_reindex; - std::unordered_map addition_dynamic_fields; - std::unordered_map reindex_dynamic_fields; std::vector del_fields; + std::vector addition_fields; + std::vector reindex_fields; + std::string this_fallback_field_type; - auto validate_op = validate_alter_payload(alter_payload, schema_additions, schema_reindex, - addition_dynamic_fields, reindex_dynamic_fields, + auto validate_op = validate_alter_payload(alter_payload, addition_fields, reindex_fields, del_fields, this_fallback_field_type); if(!validate_op.ok()) { return validate_op; @@ -3347,21 +3345,21 @@ Option Collection::alter(nlohmann::json& alter_payload) { } LOG(INFO) << "Alter payload validation is successful..."; - if(!schema_reindex.empty()) { + if(!reindex_fields.empty()) { LOG(INFO) << "Processing field additions and deletions first..."; } - auto batch_alter_op = batch_alter_data(schema_additions, addition_dynamic_fields, del_fields, fallback_field_type, false); + auto batch_alter_op = batch_alter_data(addition_fields, del_fields, fallback_field_type, false); if(!batch_alter_op.ok()) { return batch_alter_op; } - if(!schema_reindex.empty()) { + if(!reindex_fields.empty()) { LOG(INFO) << "Processing field modifications now..."; // we've to run revaliation because during schema change, some coercion might be needed // e.g. "123" -> 123 (string to integer) bool do_validation = true; - batch_alter_op = batch_alter_data(schema_reindex, reindex_dynamic_fields, {}, fallback_field_type, do_validation); + batch_alter_op = batch_alter_data(reindex_fields, {}, fallback_field_type, do_validation); if(!batch_alter_op.ok()) { return batch_alter_op; } @@ -3461,10 +3459,8 @@ void Collection::prune_doc(nlohmann::json& doc, } Option Collection::validate_alter_payload(nlohmann::json& schema_changes, - tsl::htrie_map& schema_additions, - tsl::htrie_map& schema_reindex, - std::unordered_map& addition_dynamic_fields, - std::unordered_map& reindex_dynamic_fields, + std::vector& addition_fields, + std::vector& reindex_fields, std::vector& del_fields, std::string& fallback_field_type) { if(!schema_changes.is_object()) { @@ -3525,8 +3521,7 @@ Option Collection::validate_alter_payload(nlohmann::json& schema_changes, auto found_field = (field_it != search_schema.end()); auto dyn_field_it = dynamic_fields.find(field_name); - auto found_dyn_field = (dyn_field_it != dynamic_fields.end()) || - (found_field && field_types::is_string_or_array(field_it.value().type)); + auto found_dyn_field = (dyn_field_it != dynamic_fields.end()); if(kv.value().contains("drop")) { if(!kv.value()["drop"].is_boolean() || !kv.value()["drop"].get()) { @@ -3545,16 +3540,10 @@ Option Collection::validate_alter_payload(nlohmann::json& schema_changes, if(found_field) { del_fields.push_back(field_it.value()); updated_search_schema.erase(field_it.key()); - - // we will also have to resolve the dynamic field names which match the static field name - for(auto& field_kv: dynamic_fields) { - if(std::regex_match(field_kv.first, std::regex(field_it.key()))) { - del_fields.push_back(field_kv.second); - } - } } - else if(found_dyn_field) { + // NOTE: fields with type "auto" or "string*" will exist in both `search_schema` and `dynamic_fields` + if(found_dyn_field) { del_fields.push_back(dyn_field_it->second); // we will also have to resolve the actual field names which match the dynamic field pattern for(auto& a_field: search_schema) { @@ -3587,21 +3576,15 @@ Option Collection::validate_alter_payload(nlohmann::json& schema_changes, const auto& f = diff_fields.back(); if(f.is_dynamic()) { - new_dynamic_fields.emplace(f.name, f); - - if(is_reindex) { - reindex_dynamic_fields.emplace(f.name, f); - } else { - addition_dynamic_fields.emplace(f.name, f); - } + new_dynamic_fields[f.name] = f; } else { updated_search_schema[f.name] = f; - if(is_reindex) { - // delete + reindex: we will handle these fields separately - schema_reindex.emplace(f.name, f); - } else { - schema_additions.emplace(f.name, f); - } + } + + if(is_reindex) { + reindex_fields.push_back(f); + } else { + addition_fields.push_back(f); } } else { // partial update is not supported for now @@ -3636,7 +3619,7 @@ Option Collection::validate_alter_payload(nlohmann::json& schema_changes, nlohmann::detail::error_handler_t::ignore)); } - if(!fallback_field_type.empty() || !addition_dynamic_fields.empty() || !reindex_dynamic_fields.empty()) { + if(!fallback_field_type.empty() || !new_dynamic_fields.empty()) { std::vector new_fields; Option new_fields_op = detect_new_fields(document, DIRTY_VALUES::DROP, updated_search_schema, new_dynamic_fields, @@ -3649,7 +3632,7 @@ Option Collection::validate_alter_payload(nlohmann::json& schema_changes, for(auto& new_field: new_fields) { updated_search_schema[new_field.name] = new_field; - schema_reindex[new_field.name] = new_field; + reindex_fields.push_back(new_field); } } diff --git a/test/collection_schema_change_test.cpp b/test/collection_schema_change_test.cpp index b0a9390f..fcc9905b 100644 --- a/test/collection_schema_change_test.cpp +++ b/test/collection_schema_change_test.cpp @@ -124,8 +124,8 @@ TEST_F(CollectionSchemaChangeTest, AddNewFieldsToCollection) { auto coll_fields = coll1->get_fields(); ASSERT_EQ(7, coll_fields.size()); - ASSERT_EQ("age", coll_fields[5].name); - ASSERT_EQ(".*_bool", coll_fields[6].name); + ASSERT_EQ(".*_bool", coll_fields[5].name); + ASSERT_EQ("age", coll_fields[6].name); doc["id"] = "1"; doc["title"] = "The one";