diff --git a/src/collection.cpp b/src/collection.cpp index e05cea2c..c19607c6 100644 --- a/src/collection.cpp +++ b/src/collection.cpp @@ -4181,28 +4181,33 @@ bool Collection::get_enable_nested_fields() { } Option Collection::parse_facet(const std::string& facet_field, std::vector& facets) const{ - const std::regex base_pattern("[a-z]+\\(.*\\)"); + const std::regex base_pattern(".+\\(.*\\)"); const std::regex range_pattern("[[a-zA-Z]+:\\[([0-9]+)\\, ([0-9]+)\\]"); if(facet_field.find(":") != std::string::npos) { //range based facet - if(!std::regex_match(facet_field, base_pattern)){ - std::string error = "Range string base pattern not matched."; + std::string error = "Facet range value is not valid."; return Option(400, error); } auto startpos = facet_field.find("("); auto field_name = facet_field.substr(0, startpos); + if(search_schema.count(field_name) == 0) { + std::string error = "Could not find a facet field named `" + field_name + "` in the schema."; + return Option(404, error); + } + const field& a_field = search_schema.at(field_name); + if(!a_field.is_int32() && !a_field.is_int64()){ - std::string error = "Range facet is restricted to Numeric fields only."; + std::string error = "Range facet is restricted to only int32 and int64 fields."; return Option(400, error); } facet a_facet(field_name); - //starting after "(" and excluding ")" + //starting after "(" and excluding ")" auto range_string = std::string(facet_field.begin() + startpos + 1, facet_field.end() - 1); //split the ranges @@ -4240,10 +4245,10 @@ Option Collection::parse_facet(const std::string& facet_field, std::vector } index++; - } + } if((result.empty()) || (range_open==true)){ - std::string error = "Error splitting the range string."; + std::string error = "Error splitting the facet range values."; return Option(400, error); } @@ -4253,16 +4258,16 @@ Option Collection::parse_facet(const std::string& facet_field, std::vector for(const auto& range : result){ //validate each range syntax if(!std::regex_match(range, range_pattern)){ - std::string error = "Range String range pattern not matched."; + std::string error = "Facet range value is not valid."; return Option(400, error); } - + auto pos1 = range.find(":"); std::string range_val = range.substr(0, pos1); - + auto pos2 = range.find(","); auto pos3 = range.find("]"); - + int64_t lower_range = std::stoll(range.substr(pos1 + 2, pos2)); int64_t upper_range = std::stoll(range.substr(pos2 + 1, pos3)); @@ -4282,10 +4287,10 @@ Option Collection::parse_facet(const std::string& facet_field, std::vector std::string error = "Ranges in range facet syntax should be continous."; return Option(400, error); } - + range_map[upper_range] = range_val; } - + a_facet.is_range_query = true; facets.emplace_back(std::move(a_facet)); @@ -4294,22 +4299,30 @@ Option Collection::parse_facet(const std::string& facet_field, std::vector auto prefix = facet_field.substr(0, facet_field.size() - 1); auto pair = search_schema.equal_prefix_range(prefix); + if(pair.first == pair.second) { + // not found + std::string error = "Could not find a facet field for `" + facet_field + "` in the schema."; + return Option(404, error); + } + // Collect the fields that match the prefix and are marked as facet. for (auto field = pair.first; field != pair.second; field++) { if (field->facet) { facets.emplace_back(facet(field->name)); + } else { + std::string error = "Field `" + field->name + "` is not marked as a facet in the schema."; + return Option(404, error); } } - } else {//normal facet - facets.emplace_back(facet(facet_field)); + } else { + // normal facet + if(search_schema.count(facet_field) == 0 || !search_schema.at(facet_field).facet) { + std::string error = "Could not find a facet field named `" + facet_field + "` in the schema."; + return Option(404, error); + } + facets.emplace_back(facet(facet_field)); } - if(search_schema.count(facets.back().field_name) == 0 || !search_schema.at(facets.back().field_name).facet) { - std::string error = "Could not find a facet field named `" + facets.back().field_name + "` in the schema."; - facets.pop_back(); - return Option(404, error); - } - return Option(true); } diff --git a/test/collection_faceting_test.cpp b/test/collection_faceting_test.cpp index 8ce0c639..09b31387 100644 --- a/test/collection_faceting_test.cpp +++ b/test/collection_faceting_test.cpp @@ -211,6 +211,13 @@ TEST_F(CollectionFacetingTest, FacetCounts) { ASSERT_EQ(1, (int) results["facet_counts"][0]["counts"][0]["count"]); ASSERT_STREQ("FINE PLATINUM", results["facet_counts"][0]["counts"][0]["value"].get().c_str()); + // facet with wildcard + results = coll_array_fields->search("Jeremy", query_fields, "", {"ag*"}, sort_fields, {0}, 10, 1, FREQUENCY, + {false}).get(); + ASSERT_EQ(5, results["hits"].size()); + ASSERT_EQ(1, results["facet_counts"].size()); + ASSERT_STREQ("age", results["facet_counts"][0]["field_name"].get().c_str()); + // facet query on an integer field results = coll_array_fields->search("*", query_fields, "", {"age"}, sort_fields, {0}, 10, 1, FREQUENCY, {false}, Index::DROP_TOKENS_THRESHOLD, @@ -330,6 +337,24 @@ TEST_F(CollectionFacetingTest, FacetCounts) { ASSERT_FALSE(res_op.ok()); ASSERT_STREQ("Could not find a facet field named `foobar` in the schema.", res_op.error().c_str()); + // unknown wildcard facet field + res_op = coll_array_fields->search("*", query_fields, "", {"foo*"}, sort_fields, {0}, 10, 1, FREQUENCY, + {false}, Index::DROP_TOKENS_THRESHOLD, + spp::sparse_hash_set(), + spp::sparse_hash_set(), 10); + + ASSERT_FALSE(res_op.ok()); + ASSERT_STREQ("Could not find a facet field for `foo*` in the schema.", res_op.error().c_str()); + + // known filed but not a facet + res_op = coll_array_fields->search("*", query_fields, "", {"nam*"}, sort_fields, {0}, 10, 1, FREQUENCY, + {false}, Index::DROP_TOKENS_THRESHOLD, + spp::sparse_hash_set(), + spp::sparse_hash_set(), 10); + + ASSERT_FALSE(res_op.ok()); + ASSERT_STREQ("Field `name` is not marked as a facet in the schema.", res_op.error().c_str()); + // when facet query is given but no facet fields are specified, must return an error message res_op = coll_array_fields->search("*", query_fields, "", {}, sort_fields, {0}, 10, 1, FREQUENCY, {false}, Index::DROP_TOKENS_THRESHOLD, @@ -1118,7 +1143,8 @@ TEST_F(CollectionFacetingTest, FacetParseTest){ TEST_F(CollectionFacetingTest, RangeFacetTest) { std::vector fields = {field("place", field_types::STRING, false), field("state", field_types::STRING, false), - field("visitors", field_types::INT32, true),}; + field("visitors", field_types::INT32, true), + field("trackingFrom", field_types::INT32, true),}; Collection* coll1 = collectionManager.create_collection( "coll1", 1, fields, "", 0, "", {}, {} ).get(); @@ -1127,31 +1153,35 @@ TEST_F(CollectionFacetingTest, RangeFacetTest) { doc1["place"] = "Mysore Palace"; doc1["state"] = "Karnataka"; doc1["visitors"] = 235486; + doc1["trackingFrom"] = 1900; nlohmann::json doc2; doc2["id"] = "1"; doc2["place"] = "Hampi"; doc2["state"] = "Karnataka"; doc2["visitors"] = 187654; + doc2["trackingFrom"] = 1900; nlohmann::json doc3; doc3["id"] = "2"; doc3["place"] = "Mahabalipuram"; doc3["state"] = "TamilNadu"; doc3["visitors"] = 174684; + doc3["trackingFrom"] = 1900; nlohmann::json doc4; doc4["id"] = "3"; doc4["place"] = "Meenakshi Amman Temple"; doc4["state"] = "TamilNadu"; doc4["visitors"] = 246676; + doc4["trackingFrom"] = 2000; nlohmann::json doc5; doc5["id"] = "4"; doc5["place"] = "Staue of Unity"; doc5["state"] = "Gujarat"; doc5["visitors"] = 345878; - + doc5["trackingFrom"] = 2000; ASSERT_TRUE(coll1->add(doc1.dump()).ok()); ASSERT_TRUE(coll1->add(doc2.dump()).ok()); @@ -1169,7 +1199,9 @@ TEST_F(CollectionFacetingTest, RangeFacetTest) { true, false, true, "", true).get(); ASSERT_EQ(2, results["facet_counts"][0]["counts"].size()); ASSERT_EQ(1, (int) results["facet_counts"][0]["counts"][0]["count"]); - ASSERT_STREQ("Busy", results["facet_counts"][0]["counts"][0]["value"].get().c_str()); + ASSERT_EQ("Busy", results["facet_counts"][0]["counts"][0]["value"].get()); + ASSERT_EQ(1, (int) results["facet_counts"][0]["counts"][1]["count"]); + ASSERT_EQ("VeryBusy", results["facet_counts"][0]["counts"][1]["value"].get()); auto results2 = coll1->search("Gujarat", {"state"}, "", {"visitors(Busy:[0, 200000], VeryBusy:[200000, 500000])"}, @@ -1184,6 +1216,61 @@ TEST_F(CollectionFacetingTest, RangeFacetTest) { ASSERT_STREQ("VeryBusy", results2["facet_counts"][0]["counts"][0]["value"].get().c_str()); ASSERT_TRUE(results2["facet_counts"][0]["counts"][1]["value"] == nullptr); + // ensure that unknown facet field are handled + + auto results3 = coll1->search("Gujarat", {"state"}, + "", {"visitorsz(Busy:[0, 200000], VeryBusy:[200000, 500000])"}, + {}, {2}, 10, + 1, FREQUENCY, {true}, + 10, spp::sparse_hash_set(), + spp::sparse_hash_set(), 10, "", 30, 4, "", 10, {}, {}, {}, 0, + "", "", {}, 1000, + true, false, true, "", true); + ASSERT_FALSE(results3.ok()); + ASSERT_EQ("Could not find a facet field named `visitorsz` in the schema.", results3.error()); + + auto results4 = coll1->search("*", {"state"}, + "", {"trackingFrom(Old:[0, 1910], New:[1910, 2100])"}, + {}, {2}, 10, + 1, FREQUENCY, {true}, + 10, spp::sparse_hash_set(), + spp::sparse_hash_set(), 10, "", 30, 4, "", 10, {}, {}, {}, 0, + "", "", {}, 1000, + true, false, true, "", true).get(); + + ASSERT_EQ(2, results4["facet_counts"][0]["counts"].size()); + ASSERT_EQ(3, results4["facet_counts"][0]["counts"][0]["count"].get()); + ASSERT_EQ("Old", results4["facet_counts"][0]["counts"][0]["value"].get()); + + ASSERT_EQ(2, results4["facet_counts"][0]["counts"][1]["count"].get()); + ASSERT_EQ("New", results4["facet_counts"][0]["counts"][1]["value"].get()); + + // ensure that only integer fields are allowed + auto rop = coll1->search("Karnataka", {"state"}, + "", {"state(Busy:[0, 200000], VeryBusy:[200000, 500000])"}, + {}, {2}, 10, + 1, FREQUENCY, {true}, + 10, spp::sparse_hash_set(), + spp::sparse_hash_set(), 10, "", 30, 4, "", 10, {}, {}, {}, 0, + "", "", {}, 1000, + true, false, true, "", true); + + ASSERT_FALSE(rop.ok()); + ASSERT_EQ("Range facet is restricted to only int32 and int64 fields.", rop.error()); + + // ensure that bad facet range values are handled + rop = coll1->search("Karnataka", {"state"}, + "", {"visitors(Busy:[alpha, 200000], VeryBusy:[200000, beta])"}, + {}, {2}, 10, + 1, FREQUENCY, {true}, + 10, spp::sparse_hash_set(), + spp::sparse_hash_set(), 10, "", 30, 4, "", 10, {}, {}, {}, 0, + "", "", {}, 1000, + true, false, true, "", true); + + ASSERT_FALSE(rop.ok()); + ASSERT_EQ("Facet range value is not valid.", rop.error()); + collectionManager.drop_collection("coll1"); } @@ -1306,7 +1393,7 @@ TEST_F(CollectionFacetingTest, RangeFacetTypo) { spp::sparse_hash_set(), 10, "", 30, 4, "", 10, {}, {}, {}, 0, "", "", {}, 1000, true, false, true, "", true); - ASSERT_STREQ("Error splitting the range string.", results.error().c_str()); + ASSERT_STREQ("Error splitting the facet range values.", results.error().c_str()); auto results2 = coll1->search("TamilNadu", {"state"}, "", {"visitors(Busy:[0, 200000], VeryBusy:200000, 500000])"}, //missing '[' in second range @@ -1316,7 +1403,7 @@ TEST_F(CollectionFacetingTest, RangeFacetTypo) { spp::sparse_hash_set(), 10, "", 30, 4, "", 10, {}, {}, {}, 0, "", "", {}, 1000, true, false, true, "", true); - ASSERT_STREQ("Error splitting the range string.", results2.error().c_str()); + ASSERT_STREQ("Error splitting the facet range values.", results2.error().c_str()); auto results3 = coll1->search("TamilNadu", {"state"}, "", {"visitors(Busy:[0, 200000] VeryBusy:[200000, 500000])"}, //missing ',' between ranges @@ -1326,7 +1413,7 @@ TEST_F(CollectionFacetingTest, RangeFacetTypo) { spp::sparse_hash_set(), 10, "", 30, 4, "", 10, {}, {}, {}, 0, "", "", {}, 1000, true, false, true, "", true); - ASSERT_STREQ("Error splitting the range string.", results3.error().c_str()); + ASSERT_STREQ("Error splitting the facet range values.", results3.error().c_str()); auto results4 = coll1->search("TamilNadu", {"state"}, "", {"visitors(Busy:[0 200000], VeryBusy:[200000, 500000])"}, //missing ',' between first ranges values @@ -1336,7 +1423,7 @@ TEST_F(CollectionFacetingTest, RangeFacetTypo) { spp::sparse_hash_set(), 10, "", 30, 4, "", 10, {}, {}, {}, 0, "", "", {}, 1000, true, false, true, "", true); - ASSERT_STREQ("Range String range pattern not matched.", results4.error().c_str()); + ASSERT_STREQ("Facet range value is not valid.", results4.error().c_str()); auto results5 = coll1->search("TamilNadu", {"state"}, "", {"visitors(Busy:[0, 200000 VeryBusy:200000, 500000])"}, //missing '],' and '[' @@ -1346,7 +1433,7 @@ TEST_F(CollectionFacetingTest, RangeFacetTypo) { spp::sparse_hash_set(), 10, "", 30, 4, "", 10, {}, {}, {}, 0, "", "", {}, 1000, true, false, true, "", true); - ASSERT_STREQ("Range String range pattern not matched.", results5.error().c_str()); + ASSERT_STREQ("Facet range value is not valid.", results5.error().c_str()); collectionManager.drop_collection("coll1"); }