Fix type detection in fields with regexp name.

This commit is contained in:
Kishore Nallan 2021-10-05 15:13:11 +05:30
parent 1ad6619267
commit ccee94375e
2 changed files with 108 additions and 25 deletions

View File

@ -2242,6 +2242,7 @@ Option<bool> Collection::check_and_update_schema(nlohmann::json& document, const
if(std::regex_match (kv.key(), std::regex(dynamic_field.name))) {
new_field = dynamic_field;
new_field.name = fname;
new_field.type = dynamic_field.type;
found_dynamic_field = true;
break;
}
@ -2253,34 +2254,37 @@ Option<bool> Collection::check_and_update_schema(nlohmann::json& document, const
continue;
}
// detect the actual type
if(fallback_field_type.empty() || fallback_field_type == field_types::AUTO ||
field_types::is_string_or_array(fallback_field_type)) {
if(!found_dynamic_field ||
new_field.type == field_types::AUTO || field_types::is_string_or_array(new_field.type)) {
parseable = field::get_type(kv.value(), field_type);
if(!parseable) {
if(dirty_values == DIRTY_VALUES::REJECT || dirty_values == DIRTY_VALUES::COERCE_OR_REJECT) {
return Option<bool>(400, "Type of field `" + kv.key() + "` is invalid.");
} else {
// DROP or COERCE_OR_DROP
kv = document.erase(kv);
continue;
// detect the actual type
if(!found_dynamic_field && fallback_field_type != field_types::AUTO &&
!field_types::is_string_or_array(fallback_field_type)) {
new_field.type = fallback_field_type;
} else {
parseable = field::get_type(kv.value(), field_type);
if(!parseable) {
if(dirty_values == DIRTY_VALUES::REJECT || dirty_values == DIRTY_VALUES::COERCE_OR_REJECT) {
return Option<bool>(400, "Type of field `" + kv.key() + "` is invalid.");
} else {
// DROP or COERCE_OR_DROP
kv = document.erase(kv);
continue;
}
}
new_field.type = field_type;
if (field_types::is_string_or_array(fallback_field_type)) {
// Supporting single/array field detection only for strings,
// as it does not seem to be too useful for other field types.
if (new_field.is_array()) {
new_field.type = field_types::STRING_ARRAY;
} else {
new_field.type = field_types::STRING;
}
}
}
new_field.type = field_type;
if (field_types::is_string_or_array(fallback_field_type)) {
// Supporting single/array field detection only for strings, as it does not seem to be too useful for
// other field types.
if (new_field.is_array()) {
new_field.type = field_types::STRING_ARRAY;
} else {
new_field.type = field_types::STRING;
}
}
} else {
new_field.type = fallback_field_type;
}
if(!new_field.index) {

View File

@ -821,6 +821,47 @@ TEST_F(CollectionAllFieldsTest, WildcardFacetFieldsWithoutAutoSchema) {
collectionManager.drop_collection("coll1");
}
TEST_F(CollectionAllFieldsTest, RegexpExplicitFieldTypeCoercion) {
Collection *coll1;
std::vector<field> fields = {field("title", field_types::STRING, true),
field("i.*", field_types::INT32, false, true),
field("s.*", field_types::STRING, false, true),
field("a.*", field_types::STRING_ARRAY, false, true),};
coll1 = collectionManager.get_collection("coll1").get();
if (coll1 == nullptr) {
coll1 = collectionManager.create_collection("coll1", 1, fields, "", 0).get();
}
nlohmann::json doc;
doc["title"] = "Rand Building";
doc["i_age"] = "28";
doc["s_name"] = nullptr;
doc["a_name"] = {};
// should coerce while retaining expected type
auto add_op = coll1->add(doc.dump(), CREATE);
ASSERT_TRUE(add_op.ok());
auto schema = coll1->get_fields();
ASSERT_EQ("a_name", schema[4].name);
ASSERT_EQ(field_types::STRING_ARRAY, schema[4].type);
ASSERT_EQ("i_age", schema[5].name);
ASSERT_EQ(field_types::INT32, schema[5].type);
ASSERT_EQ("s_name", schema[6].name);
ASSERT_EQ(field_types::STRING, schema[6].type);
auto results = coll1->search("rand", {"title"}, "i_age: 28", {}, sort_fields, {0}, 10, 1, FREQUENCY, {false}).get();
ASSERT_EQ(1, results["hits"].size());
collectionManager.drop_collection("coll1");
}
TEST_F(CollectionAllFieldsTest, DynamicFieldsMustOnlyBeOptional) {
Collection *coll1;
@ -890,6 +931,11 @@ TEST_F(CollectionAllFieldsTest, BothFallbackAndDynamicFields) {
auto add_op = coll1->add(doc.dump(), CREATE);
ASSERT_TRUE(add_op.ok());
// org_year should be of type int32
auto schema = coll1->get_fields();
ASSERT_EQ("org_year", schema[5].name);
ASSERT_EQ(field_types::INT32, schema[5].type);
auto res_op = coll1->search("Amazon", {"org_name"}, "", {"org_name"}, sort_fields, {0}, 10, 1, FREQUENCY, {false});
ASSERT_FALSE(res_op.ok());
ASSERT_EQ("Could not find a facet field named `org_name` in the schema.", res_op.error());
@ -906,6 +952,39 @@ TEST_F(CollectionAllFieldsTest, BothFallbackAndDynamicFields) {
collectionManager.drop_collection("coll1");
}
TEST_F(CollectionAllFieldsTest, RegexpIntFieldWithFallbackStringType) {
Collection *coll1;
std::vector<field> fields = {field("title", field_types::STRING, true),
field("n.*", field_types::INT32, false, true),
field(".*", field_types::STRING, false, true)};
coll1 = collectionManager.get_collection("coll1").get();
if (coll1 == nullptr) {
auto op = collectionManager.create_collection("coll1", 1, fields, "", 0, field_types::STRING);
ASSERT_TRUE(op.ok());
coll1 = op.get();
}
nlohmann::json doc;
doc["title"] = "Amazon Inc.";
doc["n_age"] = 32;
doc["rand_str"] = "fizzbuzz";
auto add_op = coll1->add(doc.dump(), CREATE);
ASSERT_TRUE(add_op.ok());
// n_age should be of type int32
auto schema = coll1->get_fields();
ASSERT_EQ("n_age", schema[3].name);
ASSERT_EQ(field_types::INT32, schema[3].type);
ASSERT_EQ("rand_str", schema[4].name);
ASSERT_EQ(field_types::STRING, schema[4].type);
collectionManager.drop_collection("coll1");
}
TEST_F(CollectionAllFieldsTest, ContainingWildcardOnlyField) {
Collection *coll1;