mirror of
https://github.com/typesense/typesense.git
synced 2025-05-18 04:32:38 +08:00
Explicitly dot separated fields should have precedence.
This commit is contained in:
parent
134af13281
commit
9b4fb12729
@ -386,11 +386,11 @@ struct field {
|
||||
}
|
||||
|
||||
static bool flatten_obj(nlohmann::json& doc, nlohmann::json& value, bool has_array, bool has_obj_array,
|
||||
const std::string& flat_name, std::vector<field>& flattened_fields);
|
||||
const std::string& flat_name, std::unordered_map<std::string, field>& flattened_fields);
|
||||
|
||||
static bool flatten_field(nlohmann::json& doc, nlohmann::json& obj, const field& the_field,
|
||||
std::vector<std::string>& path_parts, size_t path_index, bool has_array,
|
||||
bool has_obj_array, std::vector<field>& flattened_fields);
|
||||
bool has_obj_array, std::unordered_map<std::string, field>& flattened_fields);
|
||||
|
||||
static Option<bool> flatten_doc(nlohmann::json& document, const std::vector<field>& nested_fields,
|
||||
std::vector<field>& flattened_fields);
|
||||
|
@ -536,7 +536,7 @@ Option<bool> field::json_field_to_field(nlohmann::json& field_json, std::vector<
|
||||
}
|
||||
|
||||
bool field::flatten_obj(nlohmann::json& doc, nlohmann::json& value, bool has_array, bool has_obj_array,
|
||||
const std::string& flat_name, std::vector<field>& flattened_fields) {
|
||||
const std::string& flat_name, std::unordered_map<std::string, field>& flattened_fields) {
|
||||
if(value.is_object()) {
|
||||
has_obj_array = has_array;
|
||||
for(const auto& kv: value.items()) {
|
||||
@ -546,8 +546,11 @@ bool field::flatten_obj(nlohmann::json& doc, nlohmann::json& value, bool has_arr
|
||||
for(const auto& kv: value.items()) {
|
||||
flatten_obj(doc, kv.value(), true, has_obj_array, flat_name, flattened_fields);
|
||||
}
|
||||
} else {
|
||||
// must be a primitive
|
||||
} else { // must be a primitive
|
||||
if(doc.count(flat_name) != 0 && flattened_fields.find(flat_name) == flattened_fields.end()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if(has_array) {
|
||||
doc[flat_name].push_back(value);
|
||||
} else {
|
||||
@ -567,7 +570,7 @@ bool field::flatten_obj(nlohmann::json& doc, nlohmann::json& value, bool has_arr
|
||||
field flattened_field(flat_name, detected_type, false, true);
|
||||
flattened_field.nested = true;
|
||||
flattened_field.nested_array = has_obj_array;
|
||||
flattened_fields.push_back(flattened_field);
|
||||
flattened_fields[flat_name] = flattened_field;
|
||||
}
|
||||
|
||||
return true;
|
||||
@ -575,7 +578,7 @@ bool field::flatten_obj(nlohmann::json& doc, nlohmann::json& value, bool has_arr
|
||||
|
||||
bool field::flatten_field(nlohmann::json& doc, nlohmann::json& obj, const field& the_field,
|
||||
std::vector<std::string>& path_parts, size_t path_index,
|
||||
bool has_array, bool has_obj_array, std::vector<field>& flattened_fields) {
|
||||
bool has_array, bool has_obj_array, std::unordered_map<std::string, field>& flattened_fields) {
|
||||
if(path_index == path_parts.size()) {
|
||||
// end of path: check if obj matches expected type
|
||||
std::string detected_type;
|
||||
@ -590,10 +593,21 @@ bool field::flatten_field(nlohmann::json& doc, nlohmann::json& obj, const field&
|
||||
|
||||
has_obj_array = has_obj_array || ((detected_type == field_types::OBJECT) && has_array);
|
||||
|
||||
if(detected_type == the_field.type) {
|
||||
// handle differences in detection of numerical types
|
||||
bool is_numericaly_valid = (detected_type != the_field.type) &&
|
||||
((detected_type == field_types::INT64 &&
|
||||
(the_field.type == field_types::INT32 || the_field.type == field_types::FLOAT)) ||
|
||||
(detected_type == field_types::INT64_ARRAY &&
|
||||
(the_field.type == field_types::INT32_ARRAY || the_field.type == field_types::FLOAT_ARRAY)));
|
||||
|
||||
if(detected_type == the_field.type || is_numericaly_valid) {
|
||||
if(the_field.is_object()) {
|
||||
flatten_obj(doc, obj, has_array, has_obj_array, the_field.name, flattened_fields);
|
||||
} else {
|
||||
if(doc.count(the_field.name) != 0 && flattened_fields.find(the_field.name) == flattened_fields.end()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if(has_array) {
|
||||
doc[the_field.name].push_back(obj);
|
||||
} else {
|
||||
@ -603,30 +617,9 @@ bool field::flatten_field(nlohmann::json& doc, nlohmann::json& obj, const field&
|
||||
field flattened_field(the_field.name, detected_type, false, true);
|
||||
flattened_field.nested = (path_index > 1);
|
||||
flattened_field.nested_array = has_obj_array;
|
||||
flattened_fields.push_back(flattened_field);
|
||||
flattened_fields[the_field.name] = flattened_field;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// handle differences in detection of numerical types
|
||||
bool is_numericaly_valid = (detected_type == field_types::INT64 && (the_field.type == field_types::INT32 ||
|
||||
the_field.type == field_types::FLOAT)) ||
|
||||
(detected_type == field_types::INT64_ARRAY &&
|
||||
(the_field.type == field_types::INT32_ARRAY ||
|
||||
the_field.type == field_types::FLOAT_ARRAY));
|
||||
|
||||
if(is_numericaly_valid) {
|
||||
if(has_array) {
|
||||
doc[the_field.name].push_back(obj);
|
||||
} else {
|
||||
doc[the_field.name] = obj;
|
||||
}
|
||||
|
||||
field flattened_field(the_field.name, the_field.type, false, true);
|
||||
flattened_field.nested = (path_index > 1);
|
||||
flattened_field.nested_array = has_obj_array;
|
||||
flattened_fields.push_back(flattened_field);
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
@ -657,22 +650,27 @@ Option<bool> field::flatten_doc(nlohmann::json& document,
|
||||
const std::vector<field>& nested_fields,
|
||||
std::vector<field>& flattened_fields) {
|
||||
|
||||
std::unordered_map<std::string, field> flattened_fields_map;
|
||||
|
||||
for(auto& nested_field: nested_fields) {
|
||||
std::vector<std::string> field_parts;
|
||||
StringUtils::split(nested_field.name, field_parts, ".");
|
||||
|
||||
bool resolved = flatten_field(document, document, nested_field, field_parts, 0, false, false, flattened_fields);
|
||||
if(field_parts.size() > 1 && document.count(nested_field.name) != 0) {
|
||||
// skip explicitly present nested fields
|
||||
continue;
|
||||
}
|
||||
|
||||
bool resolved = flatten_field(document, document, nested_field, field_parts, 0, false, false, flattened_fields_map);
|
||||
if(!resolved && !nested_field.optional) {
|
||||
return Option<bool>(400, "Field `" + nested_field.name + "` was not found or has an incorrect type.");
|
||||
}
|
||||
}
|
||||
|
||||
std::sort(flattened_fields.begin(), flattened_fields.end());
|
||||
flattened_fields.erase(std::unique(flattened_fields.begin(), flattened_fields.end()), flattened_fields.end());
|
||||
|
||||
document[".flat"] = nlohmann::json::array();
|
||||
for(auto& f: flattened_fields) {
|
||||
document[".flat"].push_back(f.name);
|
||||
for(auto& kv: flattened_fields_map) {
|
||||
document[".flat"].push_back(kv.second.name);
|
||||
flattened_fields.push_back(kv.second);
|
||||
}
|
||||
|
||||
return Option<bool>(true);
|
||||
|
@ -1112,8 +1112,8 @@ TEST_F(CollectionAllFieldsTest, WildcardFieldAndDictionaryField) {
|
||||
ASSERT_EQ(4, schema.size());
|
||||
ASSERT_EQ(".*", schema[0].name);
|
||||
ASSERT_EQ("year", schema[1].name);
|
||||
ASSERT_EQ("kinds.CGXX", schema[2].name);
|
||||
ASSERT_EQ("kinds.ZBXX", schema[3].name);
|
||||
ASSERT_EQ("kinds.ZBXX", schema[2].name);
|
||||
ASSERT_EQ("kinds.CGXX", schema[3].name);
|
||||
|
||||
// filter on object key
|
||||
results = coll1->search("*", {}, "kinds.CGXX: 13", {}, sort_fields, {0}, 10, 1, FREQUENCY, {false}).get();
|
||||
|
@ -65,8 +65,8 @@ TEST_F(CollectionNestedFieldsTest, FlattenJSONObject) {
|
||||
|
||||
auto expected_json = R"(
|
||||
{
|
||||
".flat": ["locations.address.city","locations.address.products","locations.address.street",
|
||||
"locations.country","locations.pincode"],
|
||||
".flat": ["locations.pincode","locations.country","locations.address.street","locations.address.products",
|
||||
"locations.address.city"],
|
||||
"company":{"name":"nike"},
|
||||
"employees":{"num":1200},
|
||||
"locations":[
|
||||
@ -132,7 +132,7 @@ TEST_F(CollectionNestedFieldsTest, FlattenJSONObject) {
|
||||
|
||||
expected_json = R"(
|
||||
{
|
||||
".flat": ["locations.address.city", "locations.address.products", "locations.address.street"],
|
||||
".flat": ["locations.address.street", "locations.address.products","locations.address.city"],
|
||||
"company":{"name":"nike"},
|
||||
"employees":{"num":1200},
|
||||
"locations":[
|
||||
@ -236,6 +236,7 @@ TEST_F(CollectionNestedFieldsTest, TestNestedArrayField) {
|
||||
|
||||
// test against deep paths
|
||||
flattened_fields.clear();
|
||||
doc = nlohmann::json::parse(json_str);
|
||||
nested_fields = {
|
||||
field("employees.details.num_tags", field_types::INT32_ARRAY, false),
|
||||
field("employees.details.tags", field_types::STRING_ARRAY, false),
|
||||
@ -248,10 +249,10 @@ TEST_F(CollectionNestedFieldsTest, TestNestedArrayField) {
|
||||
ASSERT_EQ("employees.detail.tags",flattened_fields[0].name);
|
||||
ASSERT_FALSE(flattened_fields[0].nested_array);
|
||||
|
||||
ASSERT_EQ("employees.details.num_tags",flattened_fields[1].name);
|
||||
ASSERT_EQ("employees.details.tags",flattened_fields[1].name);
|
||||
ASSERT_TRUE(flattened_fields[1].nested_array);
|
||||
|
||||
ASSERT_EQ("employees.details.tags",flattened_fields[2].name);
|
||||
ASSERT_EQ("employees.details.num_tags",flattened_fields[2].name);
|
||||
ASSERT_TRUE(flattened_fields[2].nested_array);
|
||||
}
|
||||
|
||||
@ -1117,6 +1118,113 @@ TEST_F(CollectionNestedFieldsTest, VerifyDisableOfNestedFields) {
|
||||
ASSERT_EQ(2, coll2->get_fields().size());
|
||||
}
|
||||
|
||||
TEST_F(CollectionNestedFieldsTest, ExplicitDotSeparatedFieldsShouldHavePrecendence) {
|
||||
nlohmann::json schema = R"({
|
||||
"name": "coll1",
|
||||
"enable_nested_fields": true,
|
||||
"fields": [
|
||||
{"name": ".*", "type": "auto"}
|
||||
]
|
||||
})"_json;
|
||||
|
||||
auto op = collectionManager.create_collection(schema);
|
||||
ASSERT_TRUE(op.ok());
|
||||
Collection* coll1 = op.get();
|
||||
|
||||
auto doc1 = R"({
|
||||
"company": {"num_employees": 1000, "ids": [1,2]},
|
||||
"details": [{"name": "bar"}],
|
||||
"company.num_employees": 2000,
|
||||
"company.ids": [10],
|
||||
"details.name": "foo"
|
||||
})"_json;
|
||||
|
||||
ASSERT_TRUE(coll1->add(doc1.dump(), CREATE).ok());
|
||||
auto fs = coll1->get_fields();
|
||||
ASSERT_EQ(4, coll1->get_fields().size());
|
||||
|
||||
// simple nested object
|
||||
auto results = coll1->search("*", {}, "company.num_employees: 2000", {}, sort_fields, {0}, 10, 1,
|
||||
token_ordering::FREQUENCY, {true}).get();
|
||||
ASSERT_EQ(1, results["found"].get<size_t>());
|
||||
|
||||
results = coll1->search("*", {}, "company.num_employees: 1000", {}, sort_fields, {0}, 10, 1,
|
||||
token_ordering::FREQUENCY, {true}).get();
|
||||
ASSERT_EQ(0, results["found"].get<size_t>());
|
||||
|
||||
// nested array object
|
||||
results = coll1->search("foo", {"details.name"}, "", {}, sort_fields, {0}, 10, 1,
|
||||
token_ordering::FREQUENCY, {true}).get();
|
||||
ASSERT_EQ(1, results["found"].get<size_t>());
|
||||
|
||||
results = coll1->search("bar", {"details.name"}, "", {}, sort_fields, {0}, 10, 1,
|
||||
token_ordering::FREQUENCY, {true}).get();
|
||||
ASSERT_EQ(0, results["found"].get<size_t>());
|
||||
|
||||
// nested simple array
|
||||
results = coll1->search("*", {}, "company.ids: 10", {}, sort_fields, {0}, 10, 1,
|
||||
token_ordering::FREQUENCY, {true}).get();
|
||||
ASSERT_EQ(1, results["found"].get<size_t>());
|
||||
|
||||
results = coll1->search("*", {}, "company.ids: 1", {}, sort_fields, {0}, 10, 1,
|
||||
token_ordering::FREQUENCY, {true}).get();
|
||||
ASSERT_EQ(0, results["found"].get<size_t>());
|
||||
|
||||
// WITH EXPLICIT SCHEMA
|
||||
|
||||
schema = R"({
|
||||
"name": "coll2",
|
||||
"enable_nested_fields": true,
|
||||
"fields": [
|
||||
{"name": "company.num_employees", "type": "int32"},
|
||||
{"name": "company.ids", "type": "int32[]"},
|
||||
{"name": "details.name", "type": "string[]"}
|
||||
]
|
||||
})"_json;
|
||||
|
||||
op = collectionManager.create_collection(schema);
|
||||
ASSERT_TRUE(op.ok());
|
||||
Collection* coll2 = op.get();
|
||||
|
||||
auto doc2 = R"({
|
||||
"company": {"num_employees": 1000, "ids": [1,2]},
|
||||
"details": [{"name": "bar"}],
|
||||
"company.num_employees": 2000,
|
||||
"company.ids": [10],
|
||||
"details.name": ["foo"]
|
||||
})"_json;
|
||||
|
||||
ASSERT_TRUE(coll2->add(doc2.dump(), CREATE).ok());
|
||||
|
||||
// simple nested object
|
||||
results = coll2->search("*", {}, "company.num_employees: 2000", {}, sort_fields, {0}, 10, 1,
|
||||
token_ordering::FREQUENCY, {true}).get();
|
||||
ASSERT_EQ(1, results["found"].get<size_t>());
|
||||
|
||||
results = coll2->search("*", {}, "company.num_employees: 1000", {}, sort_fields, {0}, 10, 1,
|
||||
token_ordering::FREQUENCY, {true}).get();
|
||||
ASSERT_EQ(0, results["found"].get<size_t>());
|
||||
|
||||
// nested array object
|
||||
results = coll2->search("foo", {"details.name"}, "", {}, sort_fields, {0}, 10, 1,
|
||||
token_ordering::FREQUENCY, {true}).get();
|
||||
ASSERT_EQ(1, results["found"].get<size_t>());
|
||||
|
||||
results = coll2->search("bar", {"details.name"}, "", {}, sort_fields, {0}, 10, 1,
|
||||
token_ordering::FREQUENCY, {true}).get();
|
||||
ASSERT_EQ(0, results["found"].get<size_t>());
|
||||
|
||||
// nested simple array
|
||||
results = coll2->search("*", {}, "company.ids: 10", {}, sort_fields, {0}, 10, 1,
|
||||
token_ordering::FREQUENCY, {true}).get();
|
||||
ASSERT_EQ(1, results["found"].get<size_t>());
|
||||
|
||||
results = coll2->search("*", {}, "company.ids: 1", {}, sort_fields, {0}, 10, 1,
|
||||
token_ordering::FREQUENCY, {true}).get();
|
||||
ASSERT_EQ(0, results["found"].get<size_t>());
|
||||
|
||||
}
|
||||
|
||||
TEST_F(CollectionNestedFieldsTest, GroupByOnNestedFieldsWithWildcardSchema) {
|
||||
std::vector<field> fields = {field(".*", field_types::AUTO, false, true),
|
||||
field("education.name", field_types::STRING_ARRAY, true, true),
|
||||
@ -1189,6 +1297,36 @@ TEST_F(CollectionNestedFieldsTest, GroupByOnNestedFieldsWithWildcardSchema) {
|
||||
ASSERT_EQ("0", results["grouped_hits"][1]["hits"][0]["document"]["id"].get<std::string>());
|
||||
}
|
||||
|
||||
TEST_F(CollectionNestedFieldsTest, WildcardWithExplicitSchema) {
|
||||
nlohmann::json schema = R"({
|
||||
"name": "coll1",
|
||||
"enable_nested_fields": true,
|
||||
"fields": [
|
||||
{"name": ".*", "type": "auto"},
|
||||
{"name": "company.id", "type": "int32"},
|
||||
{"name": "studies.year", "type": "int32[]"}
|
||||
]
|
||||
})"_json;
|
||||
|
||||
auto op = collectionManager.create_collection(schema);
|
||||
ASSERT_TRUE(op.ok());
|
||||
Collection* coll1 = op.get();
|
||||
|
||||
auto doc1 = R"({
|
||||
"id": "0",
|
||||
"company": {"id": 1000, "name": "Foo"},
|
||||
"studies": [{"name": "College 1", "year": 1997}]
|
||||
})"_json;
|
||||
|
||||
ASSERT_TRUE(coll1->add(doc1.dump(), CREATE).ok());
|
||||
|
||||
auto results = coll1->search("*", {}, "company.id: 1000", {}, {}, {0}, 10, 1, FREQUENCY, {false}).get();
|
||||
ASSERT_EQ(1, results["found"].get<size_t>());
|
||||
|
||||
results = coll1->search("*", {}, "studies.year: 1997", {}, {}, {0}, 10, 1, FREQUENCY, {false}).get();
|
||||
ASSERT_EQ(1, results["found"].get<size_t>());
|
||||
}
|
||||
|
||||
TEST_F(CollectionNestedFieldsTest, UpdateOfNestFields) {
|
||||
nlohmann::json schema = R"({
|
||||
"name": "coll1",
|
||||
|
Loading…
x
Reference in New Issue
Block a user