diff --git a/src/collection.cpp b/src/collection.cpp index e9de28a5..68338a77 100644 --- a/src/collection.cpp +++ b/src/collection.cpp @@ -2242,6 +2242,7 @@ Option Collection::check_and_update_schema(nlohmann::json& document, const if(std::regex_match (kv.key(), std::regex(dynamic_field.name))) { new_field = dynamic_field; new_field.name = fname; + new_field.type = dynamic_field.type; found_dynamic_field = true; break; } @@ -2253,34 +2254,37 @@ Option Collection::check_and_update_schema(nlohmann::json& document, const continue; } - // detect the actual type - if(fallback_field_type.empty() || fallback_field_type == field_types::AUTO || - field_types::is_string_or_array(fallback_field_type)) { + if(!found_dynamic_field || + new_field.type == field_types::AUTO || field_types::is_string_or_array(new_field.type)) { - parseable = field::get_type(kv.value(), field_type); - if(!parseable) { - if(dirty_values == DIRTY_VALUES::REJECT || dirty_values == DIRTY_VALUES::COERCE_OR_REJECT) { - return Option(400, "Type of field `" + kv.key() + "` is invalid."); - } else { - // DROP or COERCE_OR_DROP - kv = document.erase(kv); - continue; + // detect the actual type + if(!found_dynamic_field && fallback_field_type != field_types::AUTO && + !field_types::is_string_or_array(fallback_field_type)) { + new_field.type = fallback_field_type; + } else { + parseable = field::get_type(kv.value(), field_type); + if(!parseable) { + if(dirty_values == DIRTY_VALUES::REJECT || dirty_values == DIRTY_VALUES::COERCE_OR_REJECT) { + return Option(400, "Type of field `" + kv.key() + "` is invalid."); + } else { + // DROP or COERCE_OR_DROP + kv = document.erase(kv); + continue; + } + } + + new_field.type = field_type; + + if (field_types::is_string_or_array(fallback_field_type)) { + // Supporting single/array field detection only for strings, + // as it does not seem to be too useful for other field types. + if (new_field.is_array()) { + new_field.type = field_types::STRING_ARRAY; + } else { + new_field.type = field_types::STRING; + } } } - - new_field.type = field_type; - - if (field_types::is_string_or_array(fallback_field_type)) { - // Supporting single/array field detection only for strings, as it does not seem to be too useful for - // other field types. - if (new_field.is_array()) { - new_field.type = field_types::STRING_ARRAY; - } else { - new_field.type = field_types::STRING; - } - } - } else { - new_field.type = fallback_field_type; } if(!new_field.index) { diff --git a/test/collection_all_fields_test.cpp b/test/collection_all_fields_test.cpp index 34f33ab2..c4374760 100644 --- a/test/collection_all_fields_test.cpp +++ b/test/collection_all_fields_test.cpp @@ -821,6 +821,47 @@ TEST_F(CollectionAllFieldsTest, WildcardFacetFieldsWithoutAutoSchema) { collectionManager.drop_collection("coll1"); } +TEST_F(CollectionAllFieldsTest, RegexpExplicitFieldTypeCoercion) { + Collection *coll1; + + std::vector fields = {field("title", field_types::STRING, true), + field("i.*", field_types::INT32, false, true), + field("s.*", field_types::STRING, false, true), + field("a.*", field_types::STRING_ARRAY, false, true),}; + + coll1 = collectionManager.get_collection("coll1").get(); + if (coll1 == nullptr) { + coll1 = collectionManager.create_collection("coll1", 1, fields, "", 0).get(); + } + + nlohmann::json doc; + doc["title"] = "Rand Building"; + doc["i_age"] = "28"; + doc["s_name"] = nullptr; + doc["a_name"] = {}; + + // should coerce while retaining expected type + + auto add_op = coll1->add(doc.dump(), CREATE); + ASSERT_TRUE(add_op.ok()); + + auto schema = coll1->get_fields(); + + ASSERT_EQ("a_name", schema[4].name); + ASSERT_EQ(field_types::STRING_ARRAY, schema[4].type); + + ASSERT_EQ("i_age", schema[5].name); + ASSERT_EQ(field_types::INT32, schema[5].type); + + ASSERT_EQ("s_name", schema[6].name); + ASSERT_EQ(field_types::STRING, schema[6].type); + + auto results = coll1->search("rand", {"title"}, "i_age: 28", {}, sort_fields, {0}, 10, 1, FREQUENCY, {false}).get(); + ASSERT_EQ(1, results["hits"].size()); + + collectionManager.drop_collection("coll1"); +} + TEST_F(CollectionAllFieldsTest, DynamicFieldsMustOnlyBeOptional) { Collection *coll1; @@ -890,6 +931,11 @@ TEST_F(CollectionAllFieldsTest, BothFallbackAndDynamicFields) { auto add_op = coll1->add(doc.dump(), CREATE); ASSERT_TRUE(add_op.ok()); + // org_year should be of type int32 + auto schema = coll1->get_fields(); + ASSERT_EQ("org_year", schema[5].name); + ASSERT_EQ(field_types::INT32, schema[5].type); + auto res_op = coll1->search("Amazon", {"org_name"}, "", {"org_name"}, sort_fields, {0}, 10, 1, FREQUENCY, {false}); ASSERT_FALSE(res_op.ok()); ASSERT_EQ("Could not find a facet field named `org_name` in the schema.", res_op.error()); @@ -906,6 +952,39 @@ TEST_F(CollectionAllFieldsTest, BothFallbackAndDynamicFields) { collectionManager.drop_collection("coll1"); } +TEST_F(CollectionAllFieldsTest, RegexpIntFieldWithFallbackStringType) { + Collection *coll1; + + std::vector fields = {field("title", field_types::STRING, true), + field("n.*", field_types::INT32, false, true), + field(".*", field_types::STRING, false, true)}; + + coll1 = collectionManager.get_collection("coll1").get(); + if (coll1 == nullptr) { + auto op = collectionManager.create_collection("coll1", 1, fields, "", 0, field_types::STRING); + ASSERT_TRUE(op.ok()); + coll1 = op.get(); + } + + nlohmann::json doc; + doc["title"] = "Amazon Inc."; + doc["n_age"] = 32; + doc["rand_str"] = "fizzbuzz"; + + auto add_op = coll1->add(doc.dump(), CREATE); + ASSERT_TRUE(add_op.ok()); + + // n_age should be of type int32 + auto schema = coll1->get_fields(); + ASSERT_EQ("n_age", schema[3].name); + ASSERT_EQ(field_types::INT32, schema[3].type); + + ASSERT_EQ("rand_str", schema[4].name); + ASSERT_EQ(field_types::STRING, schema[4].type); + + collectionManager.drop_collection("coll1"); +} + TEST_F(CollectionAllFieldsTest, ContainingWildcardOnlyField) { Collection *coll1;