Improve range facet value validation + error messages.

This commit is contained in:
Kishore Nallan 2023-02-04 12:24:12 +05:30
parent f9bd328e48
commit 7df8ea639e
2 changed files with 129 additions and 29 deletions

View File

@ -4181,28 +4181,33 @@ bool Collection::get_enable_nested_fields() {
}
Option<bool> Collection::parse_facet(const std::string& facet_field, std::vector<facet>& facets) const{
const std::regex base_pattern("[a-z]+\\(.*\\)");
const std::regex base_pattern(".+\\(.*\\)");
const std::regex range_pattern("[[a-zA-Z]+:\\[([0-9]+)\\, ([0-9]+)\\]");
if(facet_field.find(":") != std::string::npos) { //range based facet
if(!std::regex_match(facet_field, base_pattern)){
std::string error = "Range string base pattern not matched.";
std::string error = "Facet range value is not valid.";
return Option<bool>(400, error);
}
auto startpos = facet_field.find("(");
auto field_name = facet_field.substr(0, startpos);
if(search_schema.count(field_name) == 0) {
std::string error = "Could not find a facet field named `" + field_name + "` in the schema.";
return Option<bool>(404, error);
}
const field& a_field = search_schema.at(field_name);
if(!a_field.is_int32() && !a_field.is_int64()){
std::string error = "Range facet is restricted to Numeric fields only.";
std::string error = "Range facet is restricted to only int32 and int64 fields.";
return Option<bool>(400, error);
}
facet a_facet(field_name);
//starting after "(" and excluding ")"
//starting after "(" and excluding ")"
auto range_string = std::string(facet_field.begin() + startpos + 1, facet_field.end() - 1);
//split the ranges
@ -4240,10 +4245,10 @@ Option<bool> Collection::parse_facet(const std::string& facet_field, std::vector
}
index++;
}
}
if((result.empty()) || (range_open==true)){
std::string error = "Error splitting the range string.";
std::string error = "Error splitting the facet range values.";
return Option<bool>(400, error);
}
@ -4253,16 +4258,16 @@ Option<bool> Collection::parse_facet(const std::string& facet_field, std::vector
for(const auto& range : result){
//validate each range syntax
if(!std::regex_match(range, range_pattern)){
std::string error = "Range String range pattern not matched.";
std::string error = "Facet range value is not valid.";
return Option<bool>(400, error);
}
auto pos1 = range.find(":");
std::string range_val = range.substr(0, pos1);
auto pos2 = range.find(",");
auto pos3 = range.find("]");
int64_t lower_range = std::stoll(range.substr(pos1 + 2, pos2));
int64_t upper_range = std::stoll(range.substr(pos2 + 1, pos3));
@ -4282,10 +4287,10 @@ Option<bool> Collection::parse_facet(const std::string& facet_field, std::vector
std::string error = "Ranges in range facet syntax should be continous.";
return Option<bool>(400, error);
}
range_map[upper_range] = range_val;
}
a_facet.is_range_query = true;
facets.emplace_back(std::move(a_facet));
@ -4294,22 +4299,30 @@ Option<bool> Collection::parse_facet(const std::string& facet_field, std::vector
auto prefix = facet_field.substr(0, facet_field.size() - 1);
auto pair = search_schema.equal_prefix_range(prefix);
if(pair.first == pair.second) {
// not found
std::string error = "Could not find a facet field for `" + facet_field + "` in the schema.";
return Option<bool>(404, error);
}
// Collect the fields that match the prefix and are marked as facet.
for (auto field = pair.first; field != pair.second; field++) {
if (field->facet) {
facets.emplace_back(facet(field->name));
} else {
std::string error = "Field `" + field->name + "` is not marked as a facet in the schema.";
return Option<bool>(404, error);
}
}
} else {//normal facet
facets.emplace_back(facet(facet_field));
} else {
// normal facet
if(search_schema.count(facet_field) == 0 || !search_schema.at(facet_field).facet) {
std::string error = "Could not find a facet field named `" + facet_field + "` in the schema.";
return Option<bool>(404, error);
}
facets.emplace_back(facet(facet_field));
}
if(search_schema.count(facets.back().field_name) == 0 || !search_schema.at(facets.back().field_name).facet) {
std::string error = "Could not find a facet field named `" + facets.back().field_name + "` in the schema.";
facets.pop_back();
return Option<bool>(404, error);
}
return Option<bool>(true);
}

View File

@ -211,6 +211,13 @@ TEST_F(CollectionFacetingTest, FacetCounts) {
ASSERT_EQ(1, (int) results["facet_counts"][0]["counts"][0]["count"]);
ASSERT_STREQ("FINE PLATINUM", results["facet_counts"][0]["counts"][0]["value"].get<std::string>().c_str());
// facet with wildcard
results = coll_array_fields->search("Jeremy", query_fields, "", {"ag*"}, sort_fields, {0}, 10, 1, FREQUENCY,
{false}).get();
ASSERT_EQ(5, results["hits"].size());
ASSERT_EQ(1, results["facet_counts"].size());
ASSERT_STREQ("age", results["facet_counts"][0]["field_name"].get<std::string>().c_str());
// facet query on an integer field
results = coll_array_fields->search("*", query_fields, "", {"age"}, sort_fields, {0}, 10, 1, FREQUENCY,
{false}, Index::DROP_TOKENS_THRESHOLD,
@ -330,6 +337,24 @@ TEST_F(CollectionFacetingTest, FacetCounts) {
ASSERT_FALSE(res_op.ok());
ASSERT_STREQ("Could not find a facet field named `foobar` in the schema.", res_op.error().c_str());
// unknown wildcard facet field
res_op = coll_array_fields->search("*", query_fields, "", {"foo*"}, sort_fields, {0}, 10, 1, FREQUENCY,
{false}, Index::DROP_TOKENS_THRESHOLD,
spp::sparse_hash_set<std::string>(),
spp::sparse_hash_set<std::string>(), 10);
ASSERT_FALSE(res_op.ok());
ASSERT_STREQ("Could not find a facet field for `foo*` in the schema.", res_op.error().c_str());
// known filed but not a facet
res_op = coll_array_fields->search("*", query_fields, "", {"nam*"}, sort_fields, {0}, 10, 1, FREQUENCY,
{false}, Index::DROP_TOKENS_THRESHOLD,
spp::sparse_hash_set<std::string>(),
spp::sparse_hash_set<std::string>(), 10);
ASSERT_FALSE(res_op.ok());
ASSERT_STREQ("Field `name` is not marked as a facet in the schema.", res_op.error().c_str());
// when facet query is given but no facet fields are specified, must return an error message
res_op = coll_array_fields->search("*", query_fields, "", {}, sort_fields, {0}, 10, 1, FREQUENCY,
{false}, Index::DROP_TOKENS_THRESHOLD,
@ -1118,7 +1143,8 @@ TEST_F(CollectionFacetingTest, FacetParseTest){
TEST_F(CollectionFacetingTest, RangeFacetTest) {
std::vector<field> fields = {field("place", field_types::STRING, false),
field("state", field_types::STRING, false),
field("visitors", field_types::INT32, true),};
field("visitors", field_types::INT32, true),
field("trackingFrom", field_types::INT32, true),};
Collection* coll1 = collectionManager.create_collection(
"coll1", 1, fields, "", 0, "", {}, {}
).get();
@ -1127,31 +1153,35 @@ TEST_F(CollectionFacetingTest, RangeFacetTest) {
doc1["place"] = "Mysore Palace";
doc1["state"] = "Karnataka";
doc1["visitors"] = 235486;
doc1["trackingFrom"] = 1900;
nlohmann::json doc2;
doc2["id"] = "1";
doc2["place"] = "Hampi";
doc2["state"] = "Karnataka";
doc2["visitors"] = 187654;
doc2["trackingFrom"] = 1900;
nlohmann::json doc3;
doc3["id"] = "2";
doc3["place"] = "Mahabalipuram";
doc3["state"] = "TamilNadu";
doc3["visitors"] = 174684;
doc3["trackingFrom"] = 1900;
nlohmann::json doc4;
doc4["id"] = "3";
doc4["place"] = "Meenakshi Amman Temple";
doc4["state"] = "TamilNadu";
doc4["visitors"] = 246676;
doc4["trackingFrom"] = 2000;
nlohmann::json doc5;
doc5["id"] = "4";
doc5["place"] = "Staue of Unity";
doc5["state"] = "Gujarat";
doc5["visitors"] = 345878;
doc5["trackingFrom"] = 2000;
ASSERT_TRUE(coll1->add(doc1.dump()).ok());
ASSERT_TRUE(coll1->add(doc2.dump()).ok());
@ -1169,7 +1199,9 @@ TEST_F(CollectionFacetingTest, RangeFacetTest) {
true, false, true, "", true).get();
ASSERT_EQ(2, results["facet_counts"][0]["counts"].size());
ASSERT_EQ(1, (int) results["facet_counts"][0]["counts"][0]["count"]);
ASSERT_STREQ("Busy", results["facet_counts"][0]["counts"][0]["value"].get<std::string>().c_str());
ASSERT_EQ("Busy", results["facet_counts"][0]["counts"][0]["value"].get<std::string>());
ASSERT_EQ(1, (int) results["facet_counts"][0]["counts"][1]["count"]);
ASSERT_EQ("VeryBusy", results["facet_counts"][0]["counts"][1]["value"].get<std::string>());
auto results2 = coll1->search("Gujarat", {"state"},
"", {"visitors(Busy:[0, 200000], VeryBusy:[200000, 500000])"},
@ -1184,6 +1216,61 @@ TEST_F(CollectionFacetingTest, RangeFacetTest) {
ASSERT_STREQ("VeryBusy", results2["facet_counts"][0]["counts"][0]["value"].get<std::string>().c_str());
ASSERT_TRUE(results2["facet_counts"][0]["counts"][1]["value"] == nullptr);
// ensure that unknown facet field are handled
auto results3 = coll1->search("Gujarat", {"state"},
"", {"visitorsz(Busy:[0, 200000], VeryBusy:[200000, 500000])"},
{}, {2}, 10,
1, FREQUENCY, {true},
10, spp::sparse_hash_set<std::string>(),
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 10, {}, {}, {}, 0,
"<mark>", "</mark>", {}, 1000,
true, false, true, "", true);
ASSERT_FALSE(results3.ok());
ASSERT_EQ("Could not find a facet field named `visitorsz` in the schema.", results3.error());
auto results4 = coll1->search("*", {"state"},
"", {"trackingFrom(Old:[0, 1910], New:[1910, 2100])"},
{}, {2}, 10,
1, FREQUENCY, {true},
10, spp::sparse_hash_set<std::string>(),
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 10, {}, {}, {}, 0,
"<mark>", "</mark>", {}, 1000,
true, false, true, "", true).get();
ASSERT_EQ(2, results4["facet_counts"][0]["counts"].size());
ASSERT_EQ(3, results4["facet_counts"][0]["counts"][0]["count"].get<std::size_t>());
ASSERT_EQ("Old", results4["facet_counts"][0]["counts"][0]["value"].get<std::string>());
ASSERT_EQ(2, results4["facet_counts"][0]["counts"][1]["count"].get<std::size_t>());
ASSERT_EQ("New", results4["facet_counts"][0]["counts"][1]["value"].get<std::string>());
// ensure that only integer fields are allowed
auto rop = coll1->search("Karnataka", {"state"},
"", {"state(Busy:[0, 200000], VeryBusy:[200000, 500000])"},
{}, {2}, 10,
1, FREQUENCY, {true},
10, spp::sparse_hash_set<std::string>(),
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 10, {}, {}, {}, 0,
"<mark>", "</mark>", {}, 1000,
true, false, true, "", true);
ASSERT_FALSE(rop.ok());
ASSERT_EQ("Range facet is restricted to only int32 and int64 fields.", rop.error());
// ensure that bad facet range values are handled
rop = coll1->search("Karnataka", {"state"},
"", {"visitors(Busy:[alpha, 200000], VeryBusy:[200000, beta])"},
{}, {2}, 10,
1, FREQUENCY, {true},
10, spp::sparse_hash_set<std::string>(),
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 10, {}, {}, {}, 0,
"<mark>", "</mark>", {}, 1000,
true, false, true, "", true);
ASSERT_FALSE(rop.ok());
ASSERT_EQ("Facet range value is not valid.", rop.error());
collectionManager.drop_collection("coll1");
}
@ -1306,7 +1393,7 @@ TEST_F(CollectionFacetingTest, RangeFacetTypo) {
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 10, {}, {}, {}, 0,
"<mark>", "</mark>", {}, 1000,
true, false, true, "", true);
ASSERT_STREQ("Error splitting the range string.", results.error().c_str());
ASSERT_STREQ("Error splitting the facet range values.", results.error().c_str());
auto results2 = coll1->search("TamilNadu", {"state"},
"", {"visitors(Busy:[0, 200000], VeryBusy:200000, 500000])"}, //missing '[' in second range
@ -1316,7 +1403,7 @@ TEST_F(CollectionFacetingTest, RangeFacetTypo) {
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 10, {}, {}, {}, 0,
"<mark>", "</mark>", {}, 1000,
true, false, true, "", true);
ASSERT_STREQ("Error splitting the range string.", results2.error().c_str());
ASSERT_STREQ("Error splitting the facet range values.", results2.error().c_str());
auto results3 = coll1->search("TamilNadu", {"state"},
"", {"visitors(Busy:[0, 200000] VeryBusy:[200000, 500000])"}, //missing ',' between ranges
@ -1326,7 +1413,7 @@ TEST_F(CollectionFacetingTest, RangeFacetTypo) {
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 10, {}, {}, {}, 0,
"<mark>", "</mark>", {}, 1000,
true, false, true, "", true);
ASSERT_STREQ("Error splitting the range string.", results3.error().c_str());
ASSERT_STREQ("Error splitting the facet range values.", results3.error().c_str());
auto results4 = coll1->search("TamilNadu", {"state"},
"", {"visitors(Busy:[0 200000], VeryBusy:[200000, 500000])"}, //missing ',' between first ranges values
@ -1336,7 +1423,7 @@ TEST_F(CollectionFacetingTest, RangeFacetTypo) {
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 10, {}, {}, {}, 0,
"<mark>", "</mark>", {}, 1000,
true, false, true, "", true);
ASSERT_STREQ("Range String range pattern not matched.", results4.error().c_str());
ASSERT_STREQ("Facet range value is not valid.", results4.error().c_str());
auto results5 = coll1->search("TamilNadu", {"state"},
"", {"visitors(Busy:[0, 200000 VeryBusy:200000, 500000])"}, //missing '],' and '['
@ -1346,7 +1433,7 @@ TEST_F(CollectionFacetingTest, RangeFacetTypo) {
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 10, {}, {}, {}, 0,
"<mark>", "</mark>", {}, 1000,
true, false, true, "", true);
ASSERT_STREQ("Range String range pattern not matched.", results5.error().c_str());
ASSERT_STREQ("Facet range value is not valid.", results5.error().c_str());
collectionManager.drop_collection("coll1");
}