diff --git a/include/field.h b/include/field.h index 6d53d50c..ebe57f94 100644 --- a/include/field.h +++ b/include/field.h @@ -386,11 +386,11 @@ struct field { } static bool flatten_obj(nlohmann::json& doc, nlohmann::json& value, bool has_array, bool has_obj_array, - const std::string& flat_name, std::vector& flattened_fields); + const std::string& flat_name, std::unordered_map& flattened_fields); static bool flatten_field(nlohmann::json& doc, nlohmann::json& obj, const field& the_field, std::vector& path_parts, size_t path_index, bool has_array, - bool has_obj_array, std::vector& flattened_fields); + bool has_obj_array, std::unordered_map& flattened_fields); static Option flatten_doc(nlohmann::json& document, const std::vector& nested_fields, std::vector& flattened_fields); diff --git a/src/field.cpp b/src/field.cpp index 10fb1903..ee3d5811 100644 --- a/src/field.cpp +++ b/src/field.cpp @@ -536,7 +536,7 @@ Option field::json_field_to_field(nlohmann::json& field_json, std::vector< } bool field::flatten_obj(nlohmann::json& doc, nlohmann::json& value, bool has_array, bool has_obj_array, - const std::string& flat_name, std::vector& flattened_fields) { + const std::string& flat_name, std::unordered_map& flattened_fields) { if(value.is_object()) { has_obj_array = has_array; for(const auto& kv: value.items()) { @@ -546,8 +546,11 @@ bool field::flatten_obj(nlohmann::json& doc, nlohmann::json& value, bool has_arr for(const auto& kv: value.items()) { flatten_obj(doc, kv.value(), true, has_obj_array, flat_name, flattened_fields); } - } else { - // must be a primitive + } else { // must be a primitive + if(doc.count(flat_name) != 0 && flattened_fields.find(flat_name) == flattened_fields.end()) { + return true; + } + if(has_array) { doc[flat_name].push_back(value); } else { @@ -567,7 +570,7 @@ bool field::flatten_obj(nlohmann::json& doc, nlohmann::json& value, bool has_arr field flattened_field(flat_name, detected_type, false, true); flattened_field.nested = true; flattened_field.nested_array = has_obj_array; - flattened_fields.push_back(flattened_field); + flattened_fields[flat_name] = flattened_field; } return true; @@ -575,7 +578,7 @@ bool field::flatten_obj(nlohmann::json& doc, nlohmann::json& value, bool has_arr bool field::flatten_field(nlohmann::json& doc, nlohmann::json& obj, const field& the_field, std::vector& path_parts, size_t path_index, - bool has_array, bool has_obj_array, std::vector& flattened_fields) { + bool has_array, bool has_obj_array, std::unordered_map& flattened_fields) { if(path_index == path_parts.size()) { // end of path: check if obj matches expected type std::string detected_type; @@ -590,10 +593,21 @@ bool field::flatten_field(nlohmann::json& doc, nlohmann::json& obj, const field& has_obj_array = has_obj_array || ((detected_type == field_types::OBJECT) && has_array); - if(detected_type == the_field.type) { + // handle differences in detection of numerical types + bool is_numericaly_valid = (detected_type != the_field.type) && + ((detected_type == field_types::INT64 && + (the_field.type == field_types::INT32 || the_field.type == field_types::FLOAT)) || + (detected_type == field_types::INT64_ARRAY && + (the_field.type == field_types::INT32_ARRAY || the_field.type == field_types::FLOAT_ARRAY))); + + if(detected_type == the_field.type || is_numericaly_valid) { if(the_field.is_object()) { flatten_obj(doc, obj, has_array, has_obj_array, the_field.name, flattened_fields); } else { + if(doc.count(the_field.name) != 0 && flattened_fields.find(the_field.name) == flattened_fields.end()) { + return true; + } + if(has_array) { doc[the_field.name].push_back(obj); } else { @@ -603,30 +617,9 @@ bool field::flatten_field(nlohmann::json& doc, nlohmann::json& obj, const field& field flattened_field(the_field.name, detected_type, false, true); flattened_field.nested = (path_index > 1); flattened_field.nested_array = has_obj_array; - flattened_fields.push_back(flattened_field); + flattened_fields[the_field.name] = flattened_field; } - return true; - } - - // handle differences in detection of numerical types - bool is_numericaly_valid = (detected_type == field_types::INT64 && (the_field.type == field_types::INT32 || - the_field.type == field_types::FLOAT)) || - (detected_type == field_types::INT64_ARRAY && - (the_field.type == field_types::INT32_ARRAY || - the_field.type == field_types::FLOAT_ARRAY)); - - if(is_numericaly_valid) { - if(has_array) { - doc[the_field.name].push_back(obj); - } else { - doc[the_field.name] = obj; - } - - field flattened_field(the_field.name, the_field.type, false, true); - flattened_field.nested = (path_index > 1); - flattened_field.nested_array = has_obj_array; - flattened_fields.push_back(flattened_field); return true; } else { return false; @@ -657,22 +650,27 @@ Option field::flatten_doc(nlohmann::json& document, const std::vector& nested_fields, std::vector& flattened_fields) { + std::unordered_map flattened_fields_map; + for(auto& nested_field: nested_fields) { std::vector field_parts; StringUtils::split(nested_field.name, field_parts, "."); - bool resolved = flatten_field(document, document, nested_field, field_parts, 0, false, false, flattened_fields); + if(field_parts.size() > 1 && document.count(nested_field.name) != 0) { + // skip explicitly present nested fields + continue; + } + + bool resolved = flatten_field(document, document, nested_field, field_parts, 0, false, false, flattened_fields_map); if(!resolved && !nested_field.optional) { return Option(400, "Field `" + nested_field.name + "` was not found or has an incorrect type."); } } - std::sort(flattened_fields.begin(), flattened_fields.end()); - flattened_fields.erase(std::unique(flattened_fields.begin(), flattened_fields.end()), flattened_fields.end()); - document[".flat"] = nlohmann::json::array(); - for(auto& f: flattened_fields) { - document[".flat"].push_back(f.name); + for(auto& kv: flattened_fields_map) { + document[".flat"].push_back(kv.second.name); + flattened_fields.push_back(kv.second); } return Option(true); diff --git a/test/collection_all_fields_test.cpp b/test/collection_all_fields_test.cpp index 0deb0a86..8b409e3f 100644 --- a/test/collection_all_fields_test.cpp +++ b/test/collection_all_fields_test.cpp @@ -1112,8 +1112,8 @@ TEST_F(CollectionAllFieldsTest, WildcardFieldAndDictionaryField) { ASSERT_EQ(4, schema.size()); ASSERT_EQ(".*", schema[0].name); ASSERT_EQ("year", schema[1].name); - ASSERT_EQ("kinds.CGXX", schema[2].name); - ASSERT_EQ("kinds.ZBXX", schema[3].name); + ASSERT_EQ("kinds.ZBXX", schema[2].name); + ASSERT_EQ("kinds.CGXX", schema[3].name); // filter on object key results = coll1->search("*", {}, "kinds.CGXX: 13", {}, sort_fields, {0}, 10, 1, FREQUENCY, {false}).get(); diff --git a/test/collection_nested_fields_test.cpp b/test/collection_nested_fields_test.cpp index 560696cd..9194b12a 100644 --- a/test/collection_nested_fields_test.cpp +++ b/test/collection_nested_fields_test.cpp @@ -65,8 +65,8 @@ TEST_F(CollectionNestedFieldsTest, FlattenJSONObject) { auto expected_json = R"( { - ".flat": ["locations.address.city","locations.address.products","locations.address.street", - "locations.country","locations.pincode"], + ".flat": ["locations.pincode","locations.country","locations.address.street","locations.address.products", + "locations.address.city"], "company":{"name":"nike"}, "employees":{"num":1200}, "locations":[ @@ -132,7 +132,7 @@ TEST_F(CollectionNestedFieldsTest, FlattenJSONObject) { expected_json = R"( { - ".flat": ["locations.address.city", "locations.address.products", "locations.address.street"], + ".flat": ["locations.address.street", "locations.address.products","locations.address.city"], "company":{"name":"nike"}, "employees":{"num":1200}, "locations":[ @@ -236,6 +236,7 @@ TEST_F(CollectionNestedFieldsTest, TestNestedArrayField) { // test against deep paths flattened_fields.clear(); + doc = nlohmann::json::parse(json_str); nested_fields = { field("employees.details.num_tags", field_types::INT32_ARRAY, false), field("employees.details.tags", field_types::STRING_ARRAY, false), @@ -248,10 +249,10 @@ TEST_F(CollectionNestedFieldsTest, TestNestedArrayField) { ASSERT_EQ("employees.detail.tags",flattened_fields[0].name); ASSERT_FALSE(flattened_fields[0].nested_array); - ASSERT_EQ("employees.details.num_tags",flattened_fields[1].name); + ASSERT_EQ("employees.details.tags",flattened_fields[1].name); ASSERT_TRUE(flattened_fields[1].nested_array); - ASSERT_EQ("employees.details.tags",flattened_fields[2].name); + ASSERT_EQ("employees.details.num_tags",flattened_fields[2].name); ASSERT_TRUE(flattened_fields[2].nested_array); } @@ -1117,6 +1118,113 @@ TEST_F(CollectionNestedFieldsTest, VerifyDisableOfNestedFields) { ASSERT_EQ(2, coll2->get_fields().size()); } +TEST_F(CollectionNestedFieldsTest, ExplicitDotSeparatedFieldsShouldHavePrecendence) { + nlohmann::json schema = R"({ + "name": "coll1", + "enable_nested_fields": true, + "fields": [ + {"name": ".*", "type": "auto"} + ] + })"_json; + + auto op = collectionManager.create_collection(schema); + ASSERT_TRUE(op.ok()); + Collection* coll1 = op.get(); + + auto doc1 = R"({ + "company": {"num_employees": 1000, "ids": [1,2]}, + "details": [{"name": "bar"}], + "company.num_employees": 2000, + "company.ids": [10], + "details.name": "foo" + })"_json; + + ASSERT_TRUE(coll1->add(doc1.dump(), CREATE).ok()); + auto fs = coll1->get_fields(); + ASSERT_EQ(4, coll1->get_fields().size()); + + // simple nested object + auto results = coll1->search("*", {}, "company.num_employees: 2000", {}, sort_fields, {0}, 10, 1, + token_ordering::FREQUENCY, {true}).get(); + ASSERT_EQ(1, results["found"].get()); + + results = coll1->search("*", {}, "company.num_employees: 1000", {}, sort_fields, {0}, 10, 1, + token_ordering::FREQUENCY, {true}).get(); + ASSERT_EQ(0, results["found"].get()); + + // nested array object + results = coll1->search("foo", {"details.name"}, "", {}, sort_fields, {0}, 10, 1, + token_ordering::FREQUENCY, {true}).get(); + ASSERT_EQ(1, results["found"].get()); + + results = coll1->search("bar", {"details.name"}, "", {}, sort_fields, {0}, 10, 1, + token_ordering::FREQUENCY, {true}).get(); + ASSERT_EQ(0, results["found"].get()); + + // nested simple array + results = coll1->search("*", {}, "company.ids: 10", {}, sort_fields, {0}, 10, 1, + token_ordering::FREQUENCY, {true}).get(); + ASSERT_EQ(1, results["found"].get()); + + results = coll1->search("*", {}, "company.ids: 1", {}, sort_fields, {0}, 10, 1, + token_ordering::FREQUENCY, {true}).get(); + ASSERT_EQ(0, results["found"].get()); + + // WITH EXPLICIT SCHEMA + + schema = R"({ + "name": "coll2", + "enable_nested_fields": true, + "fields": [ + {"name": "company.num_employees", "type": "int32"}, + {"name": "company.ids", "type": "int32[]"}, + {"name": "details.name", "type": "string[]"} + ] + })"_json; + + op = collectionManager.create_collection(schema); + ASSERT_TRUE(op.ok()); + Collection* coll2 = op.get(); + + auto doc2 = R"({ + "company": {"num_employees": 1000, "ids": [1,2]}, + "details": [{"name": "bar"}], + "company.num_employees": 2000, + "company.ids": [10], + "details.name": ["foo"] + })"_json; + + ASSERT_TRUE(coll2->add(doc2.dump(), CREATE).ok()); + + // simple nested object + results = coll2->search("*", {}, "company.num_employees: 2000", {}, sort_fields, {0}, 10, 1, + token_ordering::FREQUENCY, {true}).get(); + ASSERT_EQ(1, results["found"].get()); + + results = coll2->search("*", {}, "company.num_employees: 1000", {}, sort_fields, {0}, 10, 1, + token_ordering::FREQUENCY, {true}).get(); + ASSERT_EQ(0, results["found"].get()); + + // nested array object + results = coll2->search("foo", {"details.name"}, "", {}, sort_fields, {0}, 10, 1, + token_ordering::FREQUENCY, {true}).get(); + ASSERT_EQ(1, results["found"].get()); + + results = coll2->search("bar", {"details.name"}, "", {}, sort_fields, {0}, 10, 1, + token_ordering::FREQUENCY, {true}).get(); + ASSERT_EQ(0, results["found"].get()); + + // nested simple array + results = coll2->search("*", {}, "company.ids: 10", {}, sort_fields, {0}, 10, 1, + token_ordering::FREQUENCY, {true}).get(); + ASSERT_EQ(1, results["found"].get()); + + results = coll2->search("*", {}, "company.ids: 1", {}, sort_fields, {0}, 10, 1, + token_ordering::FREQUENCY, {true}).get(); + ASSERT_EQ(0, results["found"].get()); + +} + TEST_F(CollectionNestedFieldsTest, GroupByOnNestedFieldsWithWildcardSchema) { std::vector fields = {field(".*", field_types::AUTO, false, true), field("education.name", field_types::STRING_ARRAY, true, true), @@ -1189,6 +1297,36 @@ TEST_F(CollectionNestedFieldsTest, GroupByOnNestedFieldsWithWildcardSchema) { ASSERT_EQ("0", results["grouped_hits"][1]["hits"][0]["document"]["id"].get()); } +TEST_F(CollectionNestedFieldsTest, WildcardWithExplicitSchema) { + nlohmann::json schema = R"({ + "name": "coll1", + "enable_nested_fields": true, + "fields": [ + {"name": ".*", "type": "auto"}, + {"name": "company.id", "type": "int32"}, + {"name": "studies.year", "type": "int32[]"} + ] + })"_json; + + auto op = collectionManager.create_collection(schema); + ASSERT_TRUE(op.ok()); + Collection* coll1 = op.get(); + + auto doc1 = R"({ + "id": "0", + "company": {"id": 1000, "name": "Foo"}, + "studies": [{"name": "College 1", "year": 1997}] + })"_json; + + ASSERT_TRUE(coll1->add(doc1.dump(), CREATE).ok()); + + auto results = coll1->search("*", {}, "company.id: 1000", {}, {}, {0}, 10, 1, FREQUENCY, {false}).get(); + ASSERT_EQ(1, results["found"].get()); + + results = coll1->search("*", {}, "studies.year: 1997", {}, {}, {0}, 10, 1, FREQUENCY, {false}).get(); + ASSERT_EQ(1, results["found"].get()); +} + TEST_F(CollectionNestedFieldsTest, UpdateOfNestFields) { nlohmann::json schema = R"({ "name": "coll1",