From b825ce752a3593e3be7c509f12ee7b0bc790cccb Mon Sep 17 00:00:00 2001 From: Harpreet Sangar Date: Tue, 23 May 2023 10:09:28 +0530 Subject: [PATCH] Skip exact filtering beyond threshold in case of geo polygon. --- src/filter.cpp | 61 +++++++++++++++++----------------- src/filter_result_iterator.cpp | 18 +++++----- test/geo_filtering_test.cpp | 44 +++++++++++++++--------- 3 files changed, 69 insertions(+), 54 deletions(-) diff --git a/src/filter.cpp b/src/filter.cpp index 3b13b400..5946e5d1 100644 --- a/src/filter.cpp +++ b/src/filter.cpp @@ -172,7 +172,8 @@ Option validate_geofilter_distance(std::string& raw_value, const string& f Option filter::parse_geopoint_filter_value(string& raw_value, const string& format_err_msg, filter& filter_exp) { // FORMAT: - // [ ([48.853, 2.344], radius: 1km, exact_filter_radius: 100km), ([48.8662, 2.3255, 48.8581, 2.3209, 48.8561, 2.3448, 48.8641, 2.3469]) ] + // [ ([48.853, 2.344], radius: 1km, exact_filter_radius: 100km), + // ([48.8662, 2.3255, 48.8581, 2.3209, 48.8561, 2.3448, 48.8641, 2.3469], exact_filter_radius: 100km) ] // Every open parenthesis represent a geo filter value. auto open_parenthesis_count = std::count(raw_value.begin(), raw_value.end(), '('); @@ -181,11 +182,9 @@ Option filter::parse_geopoint_filter_value(string& raw_value, const string } filter_exp.comparators.push_back(LESS_THAN_EQUALS); - bool is_multivalued = raw_value[0] == '['; + bool is_multivalued = open_parenthesis_count > 1; size_t i = is_multivalued; - // Adding polygonal values at last since they don't have any parameters associated with them. - std::vector polygons; for (auto j = 0; j < open_parenthesis_count; j++) { if (is_multivalued) { auto pos = raw_value.find('(', i); @@ -206,57 +205,55 @@ Option filter::parse_geopoint_filter_value(string& raw_value, const string } // [48.853, 2.344], radius: 1km, exact_filter_radius: 100km - // [48.8662, 2.3255, 48.8581, 2.3209, 48.8561, 2.3448, 48.8641, 2.3469] + // [48.8662, 2.3255, 48.8581, 2.3209, 48.8561, 2.3448, 48.8641, 2.3469], exact_filter_radius: 100km std::string value_str = raw_value.substr(i, value_end_index - i); StringUtils::trim(value_str); if (value_str.empty() || value_str[0] != '[' || value_str.find(']', 1) == std::string::npos) { return Option(400, format_err_msg); - } else { - std::vector filter_values; - StringUtils::split(value_str, filter_values, ","); - - if(filter_values.size() < 3) { - return Option(400, format_err_msg); - } } auto points_str = value_str.substr(1, value_str.find(']', 1) - 1); std::vector geo_points; StringUtils::split(points_str, geo_points, ","); - bool is_polygon = value_str.back() == ']'; + if (geo_points.size() < 2 || geo_points.size() % 2) { + return Option(400, format_err_msg); + } + + bool is_polygon = geo_points.size() > 2; for (const auto& geo_point: geo_points) { - if (!StringUtils::is_float(geo_point) || - (!is_polygon && (geo_point == "nan" || geo_point == "NaN"))) { + if (geo_point == "nan" || geo_point == "NaN" || !StringUtils::is_float(geo_point)) { return Option(400, format_err_msg); } } if (is_polygon) { - polygons.push_back(points_str); - continue; + filter_exp.values.push_back(points_str); } // Handle options. // , radius: 1km, exact_filter_radius: 100km - i = raw_value.find(']', i); - i++; + i = raw_value.find(']', i) + 1; std::vector options; StringUtils::split(raw_value.substr(i, value_end_index - i), options, ","); if (options.empty()) { - // Missing radius option - return Option(400, format_err_msg); + if (!is_polygon) { + // Missing radius option + return Option(400, format_err_msg); + } + + nlohmann::json param; + param[EXACT_GEO_FILTER_RADIUS_KEY] = DEFAULT_EXACT_GEO_FILTER_RADIUS_VALUE; + filter_exp.params.push_back(param); + + continue; } bool is_radius_present = false; for (auto const& option: options) { - if (option.empty()) { - continue; - } - std::vector key_value; StringUtils::split(option, key_value, ":"); @@ -264,7 +261,7 @@ Option filter::parse_geopoint_filter_value(string& raw_value, const string continue; } - if (key_value[0] == GEO_FILTER_RADIUS_KEY) { + if (key_value[0] == GEO_FILTER_RADIUS_KEY && !is_polygon) { is_radius_present = true; std::string distance, unit; @@ -293,10 +290,16 @@ Option filter::parse_geopoint_filter_value(string& raw_value, const string nlohmann::json param; param[EXACT_GEO_FILTER_RADIUS_KEY] = exact_under_radius; filter_exp.params.push_back(param); + + // Only EXACT_GEO_FILTER_RADIUS_KEY option would be present for a polygon. We can also stop if we've + // parsed the radius in case of a single geopoint since there are only two options. + if (is_polygon || is_radius_present) { + break; + } } } - if (!is_radius_present) { + if (!is_radius_present && !is_polygon) { return Option(400, format_err_msg); } @@ -308,10 +311,6 @@ Option filter::parse_geopoint_filter_value(string& raw_value, const string } } - for (auto const& polygon: polygons) { - filter_exp.values.push_back(polygon); - } - return Option(true); } diff --git a/src/filter_result_iterator.cpp b/src/filter_result_iterator.cpp index d9b56683..a6b61134 100644 --- a/src/filter_result_iterator.cpp +++ b/src/filter_result_iterator.cpp @@ -765,7 +765,7 @@ void filter_result_iterator_t::init() { bool is_polygon = StringUtils::is_float(filter_value_parts.back()); S2Region* query_region; - double radius; + double query_radius_meters; if (is_polygon) { const int num_verts = int(filter_value_parts.size()) / 2; std::vector vertices; @@ -790,22 +790,24 @@ void filter_result_iterator_t::init() { } else { query_region = loop; } + + query_radius_meters = S2Earth::RadiansToMeters(query_region->GetCapBound().GetRadius().radians()); } else { - radius = std::stof(filter_value_parts[2]); + query_radius_meters = std::stof(filter_value_parts[2]); const auto& unit = filter_value_parts[3]; if (unit == "km") { - radius *= 1000; + query_radius_meters *= 1000; } else { // assume "mi" (validated upstream) - radius *= 1609.34; + query_radius_meters *= 1609.34; } - S1Angle query_radius = S1Angle::Radians(S2Earth::MetersToRadians(radius)); + S1Angle query_radius_radians = S1Angle::Radians(S2Earth::MetersToRadians(query_radius_meters)); double query_lat = std::stod(filter_value_parts[0]); double query_lng = std::stod(filter_value_parts[1]); S2Point center = S2LatLng::FromDegrees(query_lat, query_lng).ToPoint(); - query_region = new S2Cap(center, query_radius); + query_region = new S2Cap(center, query_radius_radians); } std::unique_ptr query_region_guard(query_region); @@ -825,8 +827,8 @@ void filter_result_iterator_t::init() { geo_result_ids.erase(std::unique( geo_result_ids.begin(), geo_result_ids.end() ), geo_result_ids.end()); // Skip exact filtering step if query radius is greater than the threshold. - if (!is_polygon && fi < a_filter.params.size() && - radius > a_filter.params[fi][filter::EXACT_GEO_FILTER_RADIUS_KEY].get()) { + if (fi < a_filter.params.size() && + query_radius_meters > a_filter.params[fi][filter::EXACT_GEO_FILTER_RADIUS_KEY].get()) { uint32_t* out = nullptr; filter_result.count = ArrayUtils::or_scalar(geo_result_ids.data(), geo_result_ids.size(), filter_result.docs, filter_result.count, &out); diff --git a/test/geo_filtering_test.cpp b/test/geo_filtering_test.cpp index 9db67c0b..ab200262 100644 --- a/test/geo_filtering_test.cpp +++ b/test/geo_filtering_test.cpp @@ -85,7 +85,7 @@ TEST_F(GeoFilteringTest, GeoPointFiltering) { ASSERT_EQ(1, results["found"].get()); ASSERT_EQ(1, results["hits"].size()); - ASSERT_STREQ("1", results["hits"][0]["document"]["id"].get().c_str()); + ASSERT_EQ("1", results["hits"][0]["document"]["id"].get()); results = coll1->search("*", {}, "loc: [([48.90615, 2.34358], radius: 1 km), ([48.8462, 2.34515], radius: 1 km)]", {}, {}, {0}, 10, 1, FREQUENCY).get(); @@ -115,9 +115,9 @@ TEST_F(GeoFilteringTest, GeoPointFiltering) { ASSERT_EQ(3, results["found"].get()); - ASSERT_STREQ("6", results["hits"][0]["document"]["id"].get().c_str()); - ASSERT_STREQ("5", results["hits"][1]["document"]["id"].get().c_str()); - ASSERT_STREQ("3", results["hits"][2]["document"]["id"].get().c_str()); + ASSERT_EQ("6", results["hits"][0]["document"]["id"].get()); + ASSERT_EQ("5", results["hits"][1]["document"]["id"].get()); + ASSERT_EQ("3", results["hits"][2]["document"]["id"].get()); // when geo query had NaN auto gop = coll1->search("*", {}, "loc: ([NaN, nan], radius: 1 mi)", @@ -206,8 +206,8 @@ TEST_F(GeoFilteringTest, GeoPointArrayFiltering) { ASSERT_EQ(2, results["found"].get()); ASSERT_EQ(2, results["hits"].size()); - ASSERT_STREQ("1", results["hits"][0]["document"]["id"].get().c_str()); - ASSERT_STREQ("0", results["hits"][1]["document"]["id"].get().c_str()); + ASSERT_EQ("1", results["hits"][0]["document"]["id"].get()); + ASSERT_EQ("0", results["hits"][1]["document"]["id"].get()); // Default value of exact_filter_radius is 10km, exact filtering is not performed. results = coll1->search("*", @@ -217,9 +217,9 @@ TEST_F(GeoFilteringTest, GeoPointArrayFiltering) { ASSERT_EQ(3, results["found"].get()); ASSERT_EQ(3, results["hits"].size()); - ASSERT_STREQ("2", results["hits"][0]["document"]["id"].get().c_str()); - ASSERT_STREQ("1", results["hits"][1]["document"]["id"].get().c_str()); - ASSERT_STREQ("0", results["hits"][2]["document"]["id"].get().c_str()); + ASSERT_EQ("2", results["hits"][0]["document"]["id"].get()); + ASSERT_EQ("1", results["hits"][1]["document"]["id"].get()); + ASSERT_EQ("0", results["hits"][2]["document"]["id"].get()); // pick location close to none of the spots results = coll1->search("*", @@ -244,7 +244,7 @@ TEST_F(GeoFilteringTest, GeoPointArrayFiltering) { ASSERT_EQ(1, results["found"].get()); - ASSERT_STREQ("0", results["hits"][0]["document"]["id"].get().c_str()); + ASSERT_EQ("0", results["hits"][0]["document"]["id"].get()); collectionManager.drop_collection("coll1"); } @@ -355,9 +355,9 @@ TEST_F(GeoFilteringTest, GeoPolygonFiltering) { ASSERT_EQ(3, results["found"].get()); ASSERT_EQ(3, results["hits"].size()); - ASSERT_STREQ("8", results["hits"][0]["document"]["id"].get().c_str()); - ASSERT_STREQ("4", results["hits"][1]["document"]["id"].get().c_str()); - ASSERT_STREQ("0", results["hits"][2]["document"]["id"].get().c_str()); + ASSERT_EQ("8", results["hits"][0]["document"]["id"].get()); + ASSERT_EQ("4", results["hits"][1]["document"]["id"].get()); + ASSERT_EQ("0", results["hits"][2]["document"]["id"].get()); // should work even if points of polygon are clockwise @@ -389,6 +389,8 @@ TEST_F(GeoFilteringTest, GeoPolygonFilteringSouthAmerica) { std::vector> records = { {"North of Equator", "4.48615, -71.38049"}, {"South of Equator", "-8.48587, -71.02892"}, + {"North of Equator, outside polygon", "4.13377, -56.00459"}, + {"South of Equator, outside polygon", "-4.5041, -57.34523"}, }; for(size_t i=0; iadd(doc.dump()).ok()); } - // pick a polygon that covers both points - + // polygon only covers 2 points but all points are returned since exact filtering is not performed. auto results = coll1->search("*", {}, "loc: ([13.3163, -82.3585, " "-29.134, -82.3585, " @@ -417,9 +418,22 @@ TEST_F(GeoFilteringTest, GeoPolygonFilteringSouthAmerica) { "13.3163, -59.8528])", {}, {}, {0}, 10, 1, FREQUENCY).get(); + ASSERT_EQ(4, results["found"].get()); + ASSERT_EQ(4, results["hits"].size()); + + results = coll1->search("*", + {}, "loc: ([13.3163, -82.3585, " + "-29.134, -82.3585, " + "-29.134, -59.8528, " + "13.3163, -59.8528], exact_filter_radius: 2703km)", + {}, {}, {0}, 10, 1, FREQUENCY).get(); + ASSERT_EQ(2, results["found"].get()); ASSERT_EQ(2, results["hits"].size()); + ASSERT_EQ("1", results["hits"][0]["document"]["id"].get()); + ASSERT_EQ("0", results["hits"][1]["document"]["id"].get()); + collectionManager.drop_collection("coll1"); }