mirror of
https://github.com/typesense/typesense.git
synced 2025-05-25 08:17:38 +08:00
Skip exact filtering beyond threshold in case of geo polygon.
This commit is contained in:
parent
4c2811f46d
commit
b825ce752a
@ -172,7 +172,8 @@ Option<bool> validate_geofilter_distance(std::string& raw_value, const string& f
|
||||
|
||||
Option<bool> filter::parse_geopoint_filter_value(string& raw_value, const string& format_err_msg, filter& filter_exp) {
|
||||
// FORMAT:
|
||||
// [ ([48.853, 2.344], radius: 1km, exact_filter_radius: 100km), ([48.8662, 2.3255, 48.8581, 2.3209, 48.8561, 2.3448, 48.8641, 2.3469]) ]
|
||||
// [ ([48.853, 2.344], radius: 1km, exact_filter_radius: 100km),
|
||||
// ([48.8662, 2.3255, 48.8581, 2.3209, 48.8561, 2.3448, 48.8641, 2.3469], exact_filter_radius: 100km) ]
|
||||
|
||||
// Every open parenthesis represent a geo filter value.
|
||||
auto open_parenthesis_count = std::count(raw_value.begin(), raw_value.end(), '(');
|
||||
@ -181,11 +182,9 @@ Option<bool> filter::parse_geopoint_filter_value(string& raw_value, const string
|
||||
}
|
||||
|
||||
filter_exp.comparators.push_back(LESS_THAN_EQUALS);
|
||||
bool is_multivalued = raw_value[0] == '[';
|
||||
bool is_multivalued = open_parenthesis_count > 1;
|
||||
size_t i = is_multivalued;
|
||||
|
||||
// Adding polygonal values at last since they don't have any parameters associated with them.
|
||||
std::vector<std::string> polygons;
|
||||
for (auto j = 0; j < open_parenthesis_count; j++) {
|
||||
if (is_multivalued) {
|
||||
auto pos = raw_value.find('(', i);
|
||||
@ -206,57 +205,55 @@ Option<bool> filter::parse_geopoint_filter_value(string& raw_value, const string
|
||||
}
|
||||
|
||||
// [48.853, 2.344], radius: 1km, exact_filter_radius: 100km
|
||||
// [48.8662, 2.3255, 48.8581, 2.3209, 48.8561, 2.3448, 48.8641, 2.3469]
|
||||
// [48.8662, 2.3255, 48.8581, 2.3209, 48.8561, 2.3448, 48.8641, 2.3469], exact_filter_radius: 100km
|
||||
std::string value_str = raw_value.substr(i, value_end_index - i);
|
||||
StringUtils::trim(value_str);
|
||||
|
||||
if (value_str.empty() || value_str[0] != '[' || value_str.find(']', 1) == std::string::npos) {
|
||||
return Option<bool>(400, format_err_msg);
|
||||
} else {
|
||||
std::vector<std::string> filter_values;
|
||||
StringUtils::split(value_str, filter_values, ",");
|
||||
|
||||
if(filter_values.size() < 3) {
|
||||
return Option<bool>(400, format_err_msg);
|
||||
}
|
||||
}
|
||||
|
||||
auto points_str = value_str.substr(1, value_str.find(']', 1) - 1);
|
||||
std::vector<std::string> geo_points;
|
||||
StringUtils::split(points_str, geo_points, ",");
|
||||
|
||||
bool is_polygon = value_str.back() == ']';
|
||||
if (geo_points.size() < 2 || geo_points.size() % 2) {
|
||||
return Option<bool>(400, format_err_msg);
|
||||
}
|
||||
|
||||
bool is_polygon = geo_points.size() > 2;
|
||||
for (const auto& geo_point: geo_points) {
|
||||
if (!StringUtils::is_float(geo_point) ||
|
||||
(!is_polygon && (geo_point == "nan" || geo_point == "NaN"))) {
|
||||
if (geo_point == "nan" || geo_point == "NaN" || !StringUtils::is_float(geo_point)) {
|
||||
return Option<bool>(400, format_err_msg);
|
||||
}
|
||||
}
|
||||
|
||||
if (is_polygon) {
|
||||
polygons.push_back(points_str);
|
||||
continue;
|
||||
filter_exp.values.push_back(points_str);
|
||||
}
|
||||
|
||||
// Handle options.
|
||||
// , radius: 1km, exact_filter_radius: 100km
|
||||
i = raw_value.find(']', i);
|
||||
i++;
|
||||
i = raw_value.find(']', i) + 1;
|
||||
|
||||
std::vector<std::string> options;
|
||||
StringUtils::split(raw_value.substr(i, value_end_index - i), options, ",");
|
||||
|
||||
if (options.empty()) {
|
||||
// Missing radius option
|
||||
return Option<bool>(400, format_err_msg);
|
||||
if (!is_polygon) {
|
||||
// Missing radius option
|
||||
return Option<bool>(400, format_err_msg);
|
||||
}
|
||||
|
||||
nlohmann::json param;
|
||||
param[EXACT_GEO_FILTER_RADIUS_KEY] = DEFAULT_EXACT_GEO_FILTER_RADIUS_VALUE;
|
||||
filter_exp.params.push_back(param);
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
bool is_radius_present = false;
|
||||
for (auto const& option: options) {
|
||||
if (option.empty()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
std::vector<std::string> key_value;
|
||||
StringUtils::split(option, key_value, ":");
|
||||
|
||||
@ -264,7 +261,7 @@ Option<bool> filter::parse_geopoint_filter_value(string& raw_value, const string
|
||||
continue;
|
||||
}
|
||||
|
||||
if (key_value[0] == GEO_FILTER_RADIUS_KEY) {
|
||||
if (key_value[0] == GEO_FILTER_RADIUS_KEY && !is_polygon) {
|
||||
is_radius_present = true;
|
||||
|
||||
std::string distance, unit;
|
||||
@ -293,10 +290,16 @@ Option<bool> filter::parse_geopoint_filter_value(string& raw_value, const string
|
||||
nlohmann::json param;
|
||||
param[EXACT_GEO_FILTER_RADIUS_KEY] = exact_under_radius;
|
||||
filter_exp.params.push_back(param);
|
||||
|
||||
// Only EXACT_GEO_FILTER_RADIUS_KEY option would be present for a polygon. We can also stop if we've
|
||||
// parsed the radius in case of a single geopoint since there are only two options.
|
||||
if (is_polygon || is_radius_present) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!is_radius_present) {
|
||||
if (!is_radius_present && !is_polygon) {
|
||||
return Option<bool>(400, format_err_msg);
|
||||
}
|
||||
|
||||
@ -308,10 +311,6 @@ Option<bool> filter::parse_geopoint_filter_value(string& raw_value, const string
|
||||
}
|
||||
}
|
||||
|
||||
for (auto const& polygon: polygons) {
|
||||
filter_exp.values.push_back(polygon);
|
||||
}
|
||||
|
||||
return Option<bool>(true);
|
||||
}
|
||||
|
||||
|
@ -765,7 +765,7 @@ void filter_result_iterator_t::init() {
|
||||
bool is_polygon = StringUtils::is_float(filter_value_parts.back());
|
||||
S2Region* query_region;
|
||||
|
||||
double radius;
|
||||
double query_radius_meters;
|
||||
if (is_polygon) {
|
||||
const int num_verts = int(filter_value_parts.size()) / 2;
|
||||
std::vector<S2Point> vertices;
|
||||
@ -790,22 +790,24 @@ void filter_result_iterator_t::init() {
|
||||
} else {
|
||||
query_region = loop;
|
||||
}
|
||||
|
||||
query_radius_meters = S2Earth::RadiansToMeters(query_region->GetCapBound().GetRadius().radians());
|
||||
} else {
|
||||
radius = std::stof(filter_value_parts[2]);
|
||||
query_radius_meters = std::stof(filter_value_parts[2]);
|
||||
const auto& unit = filter_value_parts[3];
|
||||
|
||||
if (unit == "km") {
|
||||
radius *= 1000;
|
||||
query_radius_meters *= 1000;
|
||||
} else {
|
||||
// assume "mi" (validated upstream)
|
||||
radius *= 1609.34;
|
||||
query_radius_meters *= 1609.34;
|
||||
}
|
||||
|
||||
S1Angle query_radius = S1Angle::Radians(S2Earth::MetersToRadians(radius));
|
||||
S1Angle query_radius_radians = S1Angle::Radians(S2Earth::MetersToRadians(query_radius_meters));
|
||||
double query_lat = std::stod(filter_value_parts[0]);
|
||||
double query_lng = std::stod(filter_value_parts[1]);
|
||||
S2Point center = S2LatLng::FromDegrees(query_lat, query_lng).ToPoint();
|
||||
query_region = new S2Cap(center, query_radius);
|
||||
query_region = new S2Cap(center, query_radius_radians);
|
||||
}
|
||||
std::unique_ptr<S2Region> query_region_guard(query_region);
|
||||
|
||||
@ -825,8 +827,8 @@ void filter_result_iterator_t::init() {
|
||||
geo_result_ids.erase(std::unique( geo_result_ids.begin(), geo_result_ids.end() ), geo_result_ids.end());
|
||||
|
||||
// Skip exact filtering step if query radius is greater than the threshold.
|
||||
if (!is_polygon && fi < a_filter.params.size() &&
|
||||
radius > a_filter.params[fi][filter::EXACT_GEO_FILTER_RADIUS_KEY].get<double>()) {
|
||||
if (fi < a_filter.params.size() &&
|
||||
query_radius_meters > a_filter.params[fi][filter::EXACT_GEO_FILTER_RADIUS_KEY].get<double>()) {
|
||||
uint32_t* out = nullptr;
|
||||
filter_result.count = ArrayUtils::or_scalar(geo_result_ids.data(), geo_result_ids.size(),
|
||||
filter_result.docs, filter_result.count, &out);
|
||||
|
@ -85,7 +85,7 @@ TEST_F(GeoFilteringTest, GeoPointFiltering) {
|
||||
ASSERT_EQ(1, results["found"].get<size_t>());
|
||||
ASSERT_EQ(1, results["hits"].size());
|
||||
|
||||
ASSERT_STREQ("1", results["hits"][0]["document"]["id"].get<std::string>().c_str());
|
||||
ASSERT_EQ("1", results["hits"][0]["document"]["id"].get<std::string>());
|
||||
|
||||
results = coll1->search("*", {}, "loc: [([48.90615, 2.34358], radius: 1 km), ([48.8462, 2.34515], radius: 1 km)]",
|
||||
{}, {}, {0}, 10, 1, FREQUENCY).get();
|
||||
@ -115,9 +115,9 @@ TEST_F(GeoFilteringTest, GeoPointFiltering) {
|
||||
|
||||
ASSERT_EQ(3, results["found"].get<size_t>());
|
||||
|
||||
ASSERT_STREQ("6", results["hits"][0]["document"]["id"].get<std::string>().c_str());
|
||||
ASSERT_STREQ("5", results["hits"][1]["document"]["id"].get<std::string>().c_str());
|
||||
ASSERT_STREQ("3", results["hits"][2]["document"]["id"].get<std::string>().c_str());
|
||||
ASSERT_EQ("6", results["hits"][0]["document"]["id"].get<std::string>());
|
||||
ASSERT_EQ("5", results["hits"][1]["document"]["id"].get<std::string>());
|
||||
ASSERT_EQ("3", results["hits"][2]["document"]["id"].get<std::string>());
|
||||
|
||||
// when geo query had NaN
|
||||
auto gop = coll1->search("*", {}, "loc: ([NaN, nan], radius: 1 mi)",
|
||||
@ -206,8 +206,8 @@ TEST_F(GeoFilteringTest, GeoPointArrayFiltering) {
|
||||
ASSERT_EQ(2, results["found"].get<size_t>());
|
||||
ASSERT_EQ(2, results["hits"].size());
|
||||
|
||||
ASSERT_STREQ("1", results["hits"][0]["document"]["id"].get<std::string>().c_str());
|
||||
ASSERT_STREQ("0", results["hits"][1]["document"]["id"].get<std::string>().c_str());
|
||||
ASSERT_EQ("1", results["hits"][0]["document"]["id"].get<std::string>());
|
||||
ASSERT_EQ("0", results["hits"][1]["document"]["id"].get<std::string>());
|
||||
|
||||
// Default value of exact_filter_radius is 10km, exact filtering is not performed.
|
||||
results = coll1->search("*",
|
||||
@ -217,9 +217,9 @@ TEST_F(GeoFilteringTest, GeoPointArrayFiltering) {
|
||||
ASSERT_EQ(3, results["found"].get<size_t>());
|
||||
ASSERT_EQ(3, results["hits"].size());
|
||||
|
||||
ASSERT_STREQ("2", results["hits"][0]["document"]["id"].get<std::string>().c_str());
|
||||
ASSERT_STREQ("1", results["hits"][1]["document"]["id"].get<std::string>().c_str());
|
||||
ASSERT_STREQ("0", results["hits"][2]["document"]["id"].get<std::string>().c_str());
|
||||
ASSERT_EQ("2", results["hits"][0]["document"]["id"].get<std::string>());
|
||||
ASSERT_EQ("1", results["hits"][1]["document"]["id"].get<std::string>());
|
||||
ASSERT_EQ("0", results["hits"][2]["document"]["id"].get<std::string>());
|
||||
|
||||
// pick location close to none of the spots
|
||||
results = coll1->search("*",
|
||||
@ -244,7 +244,7 @@ TEST_F(GeoFilteringTest, GeoPointArrayFiltering) {
|
||||
|
||||
ASSERT_EQ(1, results["found"].get<size_t>());
|
||||
|
||||
ASSERT_STREQ("0", results["hits"][0]["document"]["id"].get<std::string>().c_str());
|
||||
ASSERT_EQ("0", results["hits"][0]["document"]["id"].get<std::string>());
|
||||
|
||||
collectionManager.drop_collection("coll1");
|
||||
}
|
||||
@ -355,9 +355,9 @@ TEST_F(GeoFilteringTest, GeoPolygonFiltering) {
|
||||
ASSERT_EQ(3, results["found"].get<size_t>());
|
||||
ASSERT_EQ(3, results["hits"].size());
|
||||
|
||||
ASSERT_STREQ("8", results["hits"][0]["document"]["id"].get<std::string>().c_str());
|
||||
ASSERT_STREQ("4", results["hits"][1]["document"]["id"].get<std::string>().c_str());
|
||||
ASSERT_STREQ("0", results["hits"][2]["document"]["id"].get<std::string>().c_str());
|
||||
ASSERT_EQ("8", results["hits"][0]["document"]["id"].get<std::string>());
|
||||
ASSERT_EQ("4", results["hits"][1]["document"]["id"].get<std::string>());
|
||||
ASSERT_EQ("0", results["hits"][2]["document"]["id"].get<std::string>());
|
||||
|
||||
// should work even if points of polygon are clockwise
|
||||
|
||||
@ -389,6 +389,8 @@ TEST_F(GeoFilteringTest, GeoPolygonFilteringSouthAmerica) {
|
||||
std::vector<std::vector<std::string>> records = {
|
||||
{"North of Equator", "4.48615, -71.38049"},
|
||||
{"South of Equator", "-8.48587, -71.02892"},
|
||||
{"North of Equator, outside polygon", "4.13377, -56.00459"},
|
||||
{"South of Equator, outside polygon", "-4.5041, -57.34523"},
|
||||
};
|
||||
|
||||
for(size_t i=0; i<records.size(); i++) {
|
||||
@ -408,8 +410,7 @@ TEST_F(GeoFilteringTest, GeoPolygonFilteringSouthAmerica) {
|
||||
ASSERT_TRUE(coll1->add(doc.dump()).ok());
|
||||
}
|
||||
|
||||
// pick a polygon that covers both points
|
||||
|
||||
// polygon only covers 2 points but all points are returned since exact filtering is not performed.
|
||||
auto results = coll1->search("*",
|
||||
{}, "loc: ([13.3163, -82.3585, "
|
||||
"-29.134, -82.3585, "
|
||||
@ -417,9 +418,22 @@ TEST_F(GeoFilteringTest, GeoPolygonFilteringSouthAmerica) {
|
||||
"13.3163, -59.8528])",
|
||||
{}, {}, {0}, 10, 1, FREQUENCY).get();
|
||||
|
||||
ASSERT_EQ(4, results["found"].get<size_t>());
|
||||
ASSERT_EQ(4, results["hits"].size());
|
||||
|
||||
results = coll1->search("*",
|
||||
{}, "loc: ([13.3163, -82.3585, "
|
||||
"-29.134, -82.3585, "
|
||||
"-29.134, -59.8528, "
|
||||
"13.3163, -59.8528], exact_filter_radius: 2703km)",
|
||||
{}, {}, {0}, 10, 1, FREQUENCY).get();
|
||||
|
||||
ASSERT_EQ(2, results["found"].get<size_t>());
|
||||
ASSERT_EQ(2, results["hits"].size());
|
||||
|
||||
ASSERT_EQ("1", results["hits"][0]["document"]["id"].get<std::string>());
|
||||
ASSERT_EQ("0", results["hits"][1]["document"]["id"].get<std::string>());
|
||||
|
||||
collectionManager.drop_collection("coll1");
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user