Add filter::parse_geopoint_filter_value.

This commit is contained in:
Harpreet Sangar 2023-05-19 18:52:20 +05:30
parent a7e57992d2
commit 7fb193402d
3 changed files with 202 additions and 1 deletions

View File

@ -4,6 +4,7 @@
#include <map>
#include <tsl/htrie_map.h>
#include <art.h>
#include <json.hpp>
#include "store.h"
enum FILTER_OPERATOR {
@ -26,6 +27,15 @@ struct filter {
// Would store `Foo` in case of a filter expression like `$Foo(bar := baz)`
std::string referenced_collection_name = "";
std::vector<nlohmann::json> params;
/// For searching places within a given radius of a given latlong (mi for miles and km for kilometers)
static constexpr const char* GEO_FILTER_RADIUS = "radius";
/// Radius threshold beyond which exact filtering on geo_result_ids will not be done.
static constexpr const char* EXACT_GEO_FILTER_RADIUS = "exact_filter_radius";
static constexpr const char* DEFAULT_EXACT_GEO_FILTER_RADIUS = "10km";
static const std::string RANGE_OPERATOR() {
return "..";
}
@ -39,6 +49,10 @@ struct filter {
std::string& processed_filter_val,
NUM_COMPARATOR& num_comparator);
static Option<bool> parse_geopoint_filter_value(std::string& raw_value,
const std::string& format_err_msg,
filter& filter_exp);
static Option<bool> parse_filter_query(const std::string& filter_query,
const tsl::htrie_map<char, field>& search_schema,
const Store* store,

View File

@ -143,6 +143,142 @@ Option<bool> filter::parse_geopoint_filter_value(std::string& raw_value,
return Option<bool>(true);
}
Option<bool> filter::parse_geopoint_filter_value(string& raw_value, const string& format_err_msg, filter& filter_exp) {
// FORMAT:
// [ ([48.853, 2.344], radius: 1km, exact_filter_radius: 100km), ([48.8662, 2.3255, 48.8581, 2.3209, 48.8561, 2.3448, 48.8641, 2.3469]) ]
// Every open parenthesis represent a geo filter value.
auto open_parenthesis_count = std::count(raw_value.begin(), raw_value.end(), '(');
if (open_parenthesis_count < 1) {
return Option<bool>(400, format_err_msg);
}
filter_exp.comparators.push_back(LESS_THAN_EQUALS);
bool is_multivalued = raw_value[0] == '[';
size_t i = is_multivalued;
// Adding polygonal values at last since they don't have any parameters associated with them.
std::vector<std::string> polygons;
for (auto j = 0; j < open_parenthesis_count; j++) {
if (is_multivalued) {
auto pos = raw_value.find('(', i);
if (pos == std::string::npos) {
return Option<bool>(400, format_err_msg);
}
i = pos;
}
i++;
if (i >= raw_value.size()) {
return Option<bool>(400, format_err_msg);
}
auto value_end_index = raw_value.find(')', i);
if (value_end_index == std::string::npos) {
return Option<bool>(400, format_err_msg);
}
// [48.853, 2.344], radius: 1km, exact_filter_radius: 100km
// [48.8662, 2.3255, 48.8581, 2.3209, 48.8561, 2.3448, 48.8641, 2.3469]
std::string value_str = raw_value.substr(i, value_end_index - i);
StringUtils::trim(value_str);
if (value_str.empty() || value_str[0] != '[' || value_str.find(']', 1) == std::string::npos) {
return Option<bool>(400, format_err_msg);
}
auto points_str = value_str.substr(1, value_str.find(']', 1) - 1);
std::vector<std::string> geo_points;
StringUtils::split(points_str, geo_points, ",");
for (const auto& geo_point: geo_points) {
if(!StringUtils::is_float(geo_point)) {
return Option<bool>(400, format_err_msg);
}
}
bool is_polygon = value_str.back() == ']';
if (is_polygon) {
polygons.push_back(points_str);
continue;
}
// Handle options.
// , radius: 1km, exact_filter_radius: 100km
i = raw_value.find(']', i);
i++;
std::vector<std::string> options;
StringUtils::split(raw_value.substr(i, value_end_index - i), options, ",");
if (options.empty()) {
// Missing radius option
return Option<bool>(400, format_err_msg);
}
bool is_radius_present = false;
for (auto const& option: options) {
if (option.empty()) {
continue;
}
std::vector<std::string> key_value;
StringUtils::split(option, key_value, ":");
if (key_value.size() < 2) {
continue;
}
if (key_value[0] == GEO_FILTER_RADIUS) {
is_radius_present = true;
auto& value = key_value[1];
if(value.size() < 2) {
return Option<bool>(400, "Unit must be either `km` or `mi`.");
}
std::string unit = value.substr(value.size() - 2, 2);
if(unit != "km" && unit != "mi") {
return Option<bool>(400, "Unit must be either `km` or `mi`.");
}
std::vector<std::string> dist_values;
StringUtils::split(value, dist_values, unit);
if(dist_values.size() != 1) {
return Option<bool>(400, format_err_msg);
}
if(!StringUtils::is_float(dist_values[0])) {
return Option<bool>(400, format_err_msg);
}
filter_exp.values.push_back(points_str + ", " + dist_values[0] + ", " + unit);
} else if (key_value[0] == EXACT_GEO_FILTER_RADIUS) {
nlohmann::json param;
param[EXACT_GEO_FILTER_RADIUS] = key_value[1];
filter_exp.params.push_back(param);
}
}
if (!is_radius_present) {
return Option<bool>(400, format_err_msg);
}
if (filter_exp.params.empty()) {
nlohmann::json param;
param[EXACT_GEO_FILTER_RADIUS] = DEFAULT_EXACT_GEO_FILTER_RADIUS;
filter_exp.params.push_back(param);
}
}
for (auto const& polygon: polygons) {
filter_exp.values.push_back(polygon);
}
return Option<bool>(true);
}
bool isOperator(const std::string& expression) {
return expression == "&&" || expression == "||";
}
@ -383,10 +519,23 @@ Option<bool> toFilter(const std::string expression,
}
} else if (_field.is_geopoint()) {
filter_exp = {field_name, {}, {}};
NUM_COMPARATOR num_comparator;
if ((raw_value[0] == '(' && std::count(raw_value.begin(), raw_value.end(), '[') > 0) ||
std::count(raw_value.begin(), raw_value.end(), '[') > 1 ||
std::count(raw_value.begin(), raw_value.end(), ':') > 0) {
const std::string& format_err_msg = "Value of filter field `" + _field.name + "`: must be in the "
"`([-44.50, 170.29], radius: 0.75 km, exact_filter_radius: 5 km)` or "
"([56.33, -65.97, 23.82, -127.82]) format.";
auto parse_op = filter::parse_geopoint_filter_value(raw_value, format_err_msg, filter_exp);
return parse_op;
}
const std::string& format_err_msg = "Value of filter field `" + _field.name +
"`: must be in the `(-44.50, 170.29, 0.75 km)` or "
"(56.33, -65.97, 23.82, -127.82) format.";
NUM_COMPARATOR num_comparator;
// could be a single value or a list
if (raw_value[0] == '[' && raw_value[raw_value.size() - 1] == ']') {
std::vector<std::string> filter_values;

View File

@ -349,6 +349,35 @@ size_t StringUtils::get_num_chars(const std::string& s) {
return j;
}
Option<bool> parse_multi_valued_geopoint_filter(const std::string& filter_query, std::string& tokens, size_t& index) {
// Multi-valued geopoint filter.
// field_name:[ ([points], options), ([points]) ]
auto error = Option<bool>(400, "Could not parse the geopoint filter.");
if (filter_query[index] != '[') {
return error;
}
size_t start_index = index;
auto size = filter_query.size();
// Individual geopoint filters have square brackets inside them.
int square_bracket_count = 1;
while (++index < size && square_bracket_count > 0) {
if (filter_query[index] == '[') {
square_bracket_count++;
} else if (filter_query[index] == ']') {
square_bracket_count--;
}
}
if (square_bracket_count != 0) {
return error;
}
tokens = filter_query.substr(start_index, index - start_index);
return Option<bool>(true);
}
Option<bool> parse_reference_filter(const std::string& filter_query, std::queue<std::string>& tokens, size_t& index) {
auto error = Option<bool>(400, "Could not parse the reference filter.");
if (filter_query[index] != '$') {
@ -440,6 +469,15 @@ Option<bool> StringUtils::tokenize_filter_query(const std::string& filter_query,
if (preceding_colon && c == '(') {
is_geo_value = true;
preceding_colon = false;
} else if (preceding_colon && c == '[') {
std::string value;
auto op = parse_multi_valued_geopoint_filter(filter_query, value, i);
if (!op.ok()) {
return op;
}
ss << value;
break;
} else if (preceding_colon && c != ' ') {
preceding_colon = false;
}