diff --git a/src/field.cpp b/src/field.cpp index 16528d88..6bb361c5 100644 --- a/src/field.cpp +++ b/src/field.cpp @@ -755,10 +755,14 @@ Option field::flatten_field(nlohmann::json& doc, nlohmann::json& obj, cons // handle differences in detection of numerical types bool is_numericaly_valid = (detected_type != the_field.type) && - ((detected_type == field_types::INT64 && - (the_field.type == field_types::INT32 || the_field.type == field_types::FLOAT)) || - (detected_type == field_types::INT64_ARRAY && - (the_field.type == field_types::INT32_ARRAY || the_field.type == field_types::FLOAT_ARRAY))); + ( (detected_type == field_types::INT64 && + (the_field.type == field_types::INT32 || the_field.type == field_types::FLOAT)) || + + (detected_type == field_types::INT64_ARRAY && + (the_field.type == field_types::INT32_ARRAY || the_field.type == field_types::FLOAT_ARRAY)) || + + (detected_type == field_types::FLOAT_ARRAY && the_field.type == field_types::GEOPOINT_ARRAY) + ); if(detected_type == the_field.type || is_numericaly_valid) { if(the_field.is_object()) { diff --git a/src/index.cpp b/src/index.cpp index 52b0ea99..26e2a75c 100644 --- a/src/index.cpp +++ b/src/index.cpp @@ -499,6 +499,22 @@ Option Index::validate_index_in_memory(nlohmann::json& document, uint3 } nlohmann::json::iterator it = document[field_name].begin(); + + // Handle a geopoint[] type inside an array of object: it won't be an array of array, so cannot iterate + if(a_field.nested && a_field.type == field_types::GEOPOINT_ARRAY && + it->is_number() && document[field_name].size() == 2) { + const auto& item = document[field_name]; + if(!(item[0].is_number() && item[1].is_number())) { + // one or more elements is not an number, try to coerce + Option coerce_op = coerce_geopoint(dirty_values, a_field, document, field_name, it, true, array_ele_erased); + if(!coerce_op.ok()) { + return coerce_op; + } + } + + continue; + } + for(; it != document[field_name].end(); ) { const auto& item = it.value(); array_ele_erased = false; @@ -886,54 +902,54 @@ void Index::index_field_in_memory(const field& afield, std::vector bool value = record.doc[afield.name].get(); num_tree->insert(value, seq_id); }); - } else if(afield.type == field_types::GEOPOINT) { - auto geo_index = geopoint_index.at(afield.name); - - iterate_and_index_numerical_field(iter_batch, afield, [&afield, geo_index] - (const index_record& record, uint32_t seq_id) { - const std::vector& latlong = record.doc[afield.name]; - - S2RegionTermIndexer::Options options; - options.set_index_contains_points_only(true); - S2RegionTermIndexer indexer(options); - S2Point point = S2LatLng::FromDegrees(latlong[0], latlong[1]).ToPoint(); - - for(const auto& term: indexer.GetIndexTerms(point, "")) { - (*geo_index)[term].push_back(seq_id); - } - }); - } else if(afield.type == field_types::GEOPOINT_ARRAY) { + } else if(afield.type == field_types::GEOPOINT || afield.type == field_types::GEOPOINT_ARRAY) { auto geo_index = geopoint_index.at(afield.name); iterate_and_index_numerical_field(iter_batch, afield, - [&afield, &geo_array_index=geo_array_index, geo_index](const index_record& record, uint32_t seq_id) { + [&afield, &geo_array_index=geo_array_index, geo_index](const index_record& record, uint32_t seq_id) { + // nested geopoint value inside an array of object will be a simple array so must be treated as geopoint + bool nested_obj_arr_geopoint = (afield.nested && afield.type == field_types::GEOPOINT_ARRAY && + record.doc[afield.name].size() == 2 && record.doc[afield.name][0].is_number()); - const std::vector>& latlongs = record.doc[afield.name]; - S2RegionTermIndexer::Options options; - options.set_index_contains_points_only(true); - S2RegionTermIndexer indexer(options); + if(afield.type == field_types::GEOPOINT || nested_obj_arr_geopoint) { + const std::vector& latlong = record.doc[afield.name]; - int64_t* packed_latlongs = new int64_t[latlongs.size() + 1]; - packed_latlongs[0] = latlongs.size(); + S2RegionTermIndexer::Options options; + options.set_index_contains_points_only(true); + S2RegionTermIndexer indexer(options); + S2Point point = S2LatLng::FromDegrees(latlong[0], latlong[1]).ToPoint(); - for(size_t li = 0; li < latlongs.size(); li++) { - auto& latlong = latlongs[li]; - S2Point point = S2LatLng::FromDegrees(latlong[0], latlong[1]).ToPoint(); - std::set terms; - for(const auto& term: indexer.GetIndexTerms(point, "")) { - terms.insert(term); - } + for(const auto& term: indexer.GetIndexTerms(point, "")) { + (*geo_index)[term].push_back(seq_id); + } + } else { + const std::vector>& latlongs = record.doc[afield.name]; + S2RegionTermIndexer::Options options; + options.set_index_contains_points_only(true); + S2RegionTermIndexer indexer(options); - for(const auto& term: terms) { - (*geo_index)[term].push_back(seq_id); - } + int64_t* packed_latlongs = new int64_t[latlongs.size() + 1]; + packed_latlongs[0] = latlongs.size(); - int64_t packed_latlong = GeoPoint::pack_lat_lng(latlong[0], latlong[1]); - packed_latlongs[li + 1] = packed_latlong; - } + for(size_t li = 0; li < latlongs.size(); li++) { + auto& latlong = latlongs[li]; + S2Point point = S2LatLng::FromDegrees(latlong[0], latlong[1]).ToPoint(); + std::set terms; + for(const auto& term: indexer.GetIndexTerms(point, "")) { + terms.insert(term); + } - geo_array_index.at(afield.name)->emplace(seq_id, packed_latlongs); - }); + for(const auto& term: terms) { + (*geo_index)[term].push_back(seq_id); + } + + int64_t packed_latlong = GeoPoint::pack_lat_lng(latlong[0], latlong[1]); + packed_latlongs[li + 1] = packed_latlong; + } + + geo_array_index.at(afield.name)->emplace(seq_id, packed_latlongs); + } + }); } else if(afield.is_array()) { // handle vector index first if(afield.type == field_types::FLOAT_ARRAY && afield.num_dim > 0) { diff --git a/test/collection_nested_fields_test.cpp b/test/collection_nested_fields_test.cpp index 87a58145..678b93bb 100644 --- a/test/collection_nested_fields_test.cpp +++ b/test/collection_nested_fields_test.cpp @@ -1757,6 +1757,56 @@ TEST_F(CollectionNestedFieldsTest, NestedFieldWithExplicitWeight) { ASSERT_EQ(1, results["found"].get()); } +TEST_F(CollectionNestedFieldsTest, NestedFieldWithGeopointArray) { + nlohmann::json schema = R"({ + "name": "coll1", + "enable_nested_fields": true, + "fields": [ + {"name": "addresses.geoPoint", "type": "geopoint[]"} + ] + })"_json; + + auto op = collectionManager.create_collection(schema); + ASSERT_TRUE(op.ok()); + Collection* coll1 = op.get(); + + auto doc1 = R"({ + "addresses": [{"geoPoint": [1.91, 23.5]}] + })"_json; + + ASSERT_TRUE(coll1->add(doc1.dump(), CREATE).ok()); + + auto results = coll1->search("*", {}, "", {}, {}, {0}, 10, 1, FREQUENCY, {false}, 0).get(); + ASSERT_EQ(1, results["found"].get()); + + // with nested geopoint array + + auto doc2 = R"({ + "addresses": [{"geoPoint": [[1.91, 23.5]]}] + })"_json; + + ASSERT_TRUE(coll1->add(doc2.dump(), CREATE).ok()); + results = coll1->search("*", {}, "", {}, {}, {0}, 10, 1, FREQUENCY, {false}, 0).get(); + ASSERT_EQ(2, results["found"].get()); + + // data validation + auto bad_doc = R"({ + "addresses": [{"geoPoint": [1.91, "x"]}] + })"_json; + + auto create_op = coll1->add(bad_doc.dump(), CREATE); + ASSERT_FALSE(create_op.ok()); + ASSERT_EQ("Field `addresses.geoPoint` has an incorrect type.", create_op.error()); + + bad_doc = R"({ + "addresses": [{"geoPoint": [[1.91, "x"]]}] + })"_json; + + create_op = coll1->add(bad_doc.dump(), CREATE); + ASSERT_FALSE(create_op.ok()); + ASSERT_EQ("Field `addresses.geoPoint` must be an array of geopoint.", create_op.error()); +} + TEST_F(CollectionNestedFieldsTest, GroupByOnNestedFieldsWithWildcardSchema) { std::vector fields = {field(".*", field_types::AUTO, false, true), field("education.name", field_types::STRING_ARRAY, true, true),