diff --git a/include/string_utils.h b/include/string_utils.h index 9a132d40..c1d7f201 100644 --- a/include/string_utils.h +++ b/include/string_utils.h @@ -67,6 +67,8 @@ struct StringUtils { return ss.str(); } + static void split_to_values(const std::string& vals_str, std::vector& filter_values); + // Adapted from: http://stackoverflow.com/a/36000453/131050 static std::string & trim(std::string & str) { // right trim diff --git a/src/collection.cpp b/src/collection.cpp index 723a3ad4..b3d8c3c1 100644 --- a/src/collection.cpp +++ b/src/collection.cpp @@ -1966,7 +1966,7 @@ Option Collection::parse_geopoint_filter_value(std::string& raw_value, } Option Collection::parse_filter_query(const std::string& simple_filter_query, - std::vector& filters) const { + std::vector& filters) const { std::vector exclude_filters; // to ensure that they go last in the list of filters @@ -2157,7 +2157,7 @@ Option Collection::parse_filter_query(const std::string& simple_filter_que if(raw_value[filter_value_index] == '[' && raw_value[raw_value.size() - 1] == ']') { std::vector filter_values; - StringUtils::split(raw_value.substr(filter_value_index+1, raw_value.size() - filter_value_index - 2), filter_values, ","); + StringUtils::split_to_values(raw_value.substr(filter_value_index+1, raw_value.size() - filter_value_index - 2), filter_values); f = {field_name, filter_values, {str_comparator}}; } else { f = {field_name, {raw_value.substr(filter_value_index)}, {str_comparator}}; diff --git a/src/string_utils.cpp b/src/string_utils.cpp index f053961e..717405f4 100644 --- a/src/string_utils.cpp +++ b/src/string_utils.cpp @@ -139,6 +139,47 @@ std::map StringUtils::parse_query_string(const std::st return query_map; } +void StringUtils::split_to_values(const std::string& vals_str, std::vector& filter_values) { + size_t i = 0; + + bool inside_tick = false; + std::string buffer; + buffer.reserve(20); + + while(i < vals_str.size()) { + char c = vals_str[i]; + bool escaped_tick = (i != 0) && c == '`' && vals_str[i-1] == '\\'; + + switch(c) { + case '`': + if(escaped_tick) { + buffer += c; + } else if(inside_tick && !buffer.empty()) { + inside_tick = false; + } else { + inside_tick = true; + } + break; + case ',': + if(!inside_tick) { + filter_values.push_back(buffer); + buffer = ""; + } else { + buffer += c; + } + break; + default: + buffer += c; + } + + i++; + } + + if(!buffer.empty()) { + filter_values.push_back(buffer); + } +} + /*size_t StringUtils::unicode_length(const std::string& bytes) { std::wstring_convert, char32_t> utf8conv; return utf8conv.from_bytes(bytes).size(); diff --git a/test/collection_filtering_test.cpp b/test/collection_filtering_test.cpp index cbb3efd1..ca360d79 100644 --- a/test/collection_filtering_test.cpp +++ b/test/collection_filtering_test.cpp @@ -1341,5 +1341,62 @@ TEST_F(CollectionFilteringTest, NegationOperatorBasics) { ASSERT_EQ(1, results["found"].get()); ASSERT_STREQ("3", results["hits"][0]["document"]["id"].get().c_str()); + collectionManager.drop_collection("coll1"); +} + +TEST_F(CollectionFilteringTest, FilterStringsWithComma) { + Collection *coll1; + + std::vector fields = {field("place", field_types::STRING, true), + field("state", field_types::STRING, false), + field("points", field_types::INT32, false),}; + + coll1 = collectionManager.get_collection("coll1").get(); + if(coll1 == nullptr) { + coll1 = collectionManager.create_collection("coll1", 1, fields, "points").get(); + } + + std::vector> records = { + {"St. John's Cathedral, Denver, Colorado", "Colorado"}, + {"Crater Lake National Park, Oregon", "Oregon"}, + {"St. Patrick's Cathedral, Manhattan", "New York"}, + }; + + for(size_t i=0; iadd(doc.dump()).ok()); + } + + auto results = coll1->search("*", {"place"}, "place:= St. John's Cathedral, Denver, Colorado", {}, {}, 0, 10, 1, + FREQUENCY, true, 10).get(); + + ASSERT_EQ(1, results["found"].get()); + ASSERT_STREQ("0", results["hits"][0]["document"]["id"].get().c_str()); + + results = coll1->search("*", {"place"}, "place:= [`St. John's Cathedral, Denver, Colorado`]", {}, {}, 0, 10, 1, + FREQUENCY, true, 10).get(); + + ASSERT_EQ(1, results["found"].get()); + ASSERT_STREQ("0", results["hits"][0]["document"]["id"].get().c_str()); + + results = coll1->search("*", {"place"}, "place:= [`St. John's Cathedral, Denver, Colorado`, `St. Patrick's Cathedral, Manhattan`]", {}, {}, 0, 10, 1, + FREQUENCY, true, 10).get(); + + ASSERT_EQ(2, results["found"].get()); + ASSERT_STREQ("2", results["hits"][0]["document"]["id"].get().c_str()); + ASSERT_STREQ("0", results["hits"][1]["document"]["id"].get().c_str()); + + results = coll1->search("*", {"place"}, "place: [`Cathedral, Denver, Colorado`]", {}, {}, 0, 10, 1, + FREQUENCY, true, 10).get(); + + ASSERT_EQ(1, results["found"].get()); + ASSERT_STREQ("0", results["hits"][0]["document"]["id"].get().c_str()); + collectionManager.drop_collection("coll1"); } \ No newline at end of file diff --git a/test/string_utils_test.cpp b/test/string_utils_test.cpp index bd374470..e2d7b2a9 100644 --- a/test/string_utils_test.cpp +++ b/test/string_utils_test.cpp @@ -218,3 +218,41 @@ TEST(StringUtilsTest, ShouldParseQueryString) { qmap = StringUtils::parse_query_string(qs); ASSERT_EQ(0, qmap.size()); } + +TEST(StringUtilsTest, ShouldParseStringifiedList) { + std::string str = "John Galt, Random Jack"; + std::vector strs; + + StringUtils::split_to_values(str, strs); + ASSERT_EQ(2, strs.size()); + ASSERT_EQ("John Galt", strs[0]); + ASSERT_EQ(" Random Jack", strs[1]); + + strs.clear(); + str = "`John Galt`, `Random, Jack`"; + StringUtils::split_to_values(str, strs); + ASSERT_EQ(2, strs.size()); + ASSERT_EQ("John Galt", strs[0]); + ASSERT_EQ(" Random, Jack", strs[1]); + + strs.clear(); + str = "`John Galt, `Random, Jack`"; + StringUtils::split_to_values(str, strs); + ASSERT_EQ(2, strs.size()); + ASSERT_EQ("John Galt, Random", strs[0]); + ASSERT_EQ(" Jack", strs[1]); + + strs.clear(); + str = "`Traveller's \\`delight\\`!`, Not wrapped, Last word"; + StringUtils::split_to_values(str, strs); + ASSERT_EQ(3, strs.size()); + ASSERT_EQ("Traveller's \\`delight\\`!", strs[0]); + ASSERT_EQ(" Not wrapped", strs[1]); + ASSERT_EQ(" Last word", strs[2]); + + strs.clear(); + str = "`John Galt`"; + StringUtils::split_to_values(str, strs); + ASSERT_EQ(1, strs.size()); + ASSERT_EQ("John Galt", strs[0]); +} \ No newline at end of file