diff --git a/include/collection_manager.h b/include/collection_manager.h index 02b4dcbe..99de5a8c 100644 --- a/include/collection_manager.h +++ b/include/collection_manager.h @@ -201,4 +201,7 @@ public: Option upsert_preset(const std::string & preset_name, const nlohmann::json& preset_config); Option delete_preset(const std::string & preset_name); + + static void _get_reference_collection_names(const std::string& filter_query, + std::set& reference_collection_names); }; diff --git a/include/string_utils.h b/include/string_utils.h index 7a3c9fd3..74cfc537 100644 --- a/include/string_utils.h +++ b/include/string_utils.h @@ -335,7 +335,4 @@ struct StringUtils { static Option tokenize_filter_query(const std::string& filter_query, std::queue& tokens); static Option split_include_fields(const std::string& include_fields, std::vector& tokens); - - static void get_reference_collection_names(const std::string& filter_query, - std::set& reference_collection_names); }; diff --git a/src/collection_manager.cpp b/src/collection_manager.cpp index 9944b62c..2828a378 100644 --- a/src/collection_manager.cpp +++ b/src/collection_manager.cpp @@ -676,13 +676,61 @@ Option add_unsigned_int_list_param(const std::string& param_name, const st return Option(true); } +void CollectionManager::_get_reference_collection_names(const std::string& filter_query, + std::set& reference_collection_names) { + auto size = filter_query.size(); + for (uint32_t i = 0; i < size;) { + auto c = filter_query[i]; + if (c == ' ' || c == '(' || c == ')') { + i++; + } else if (c == '&' || c == '|') { + i += 2; + } else { + // Reference filter would start with $ symbol. + if (c == '$') { + auto open_paren_pos = filter_query.find('(', ++i); + if (open_paren_pos == std::string::npos) { + return; + } + + auto reference_collection_name = filter_query.substr(i, open_paren_pos - i); + StringUtils::trim(reference_collection_name); + if (!reference_collection_name.empty()) { + reference_collection_names.insert(reference_collection_name); + } + + i = open_paren_pos; + int parenthesis_count = 1; + while (++i < size && parenthesis_count > 0) { + if (filter_query[i] == '(') { + parenthesis_count++; + } else if (filter_query[i] == ')') { + parenthesis_count--; + } + } + } else { + while (filter_query[++i] != ':'); + bool in_backtick = false; + do { + c = filter_query[++i]; + if (c == '`') { + in_backtick = !in_backtick; + } + } while (i < size && (in_backtick || (c != '(' && c != ')' && + !(c == '&' && filter_query[i + 1] == '&') && + !(c == '|' && filter_query[i + 1] == '|')))); + } + } + } +} + void initialize_include_fields_vec(const std::string& filter_query, std::vector& include_fields_vec) { if (filter_query.empty()) { return; } std::set reference_collection_names; - StringUtils::get_reference_collection_names(filter_query, reference_collection_names); + CollectionManager::_get_reference_collection_names(filter_query, reference_collection_names); if (reference_collection_names.empty()) { return; } diff --git a/src/filter.cpp b/src/filter.cpp index 5d94c66c..10354ea0 100644 --- a/src/filter.cpp +++ b/src/filter.cpp @@ -737,16 +737,16 @@ Option filter::parse_filter_query(const std::string& filter_query, return tokenize_op; } - if (tokens.size() > 100) { - return Option(400, "Filter expression is not valid."); - } - std::queue postfix; Option toPostfix_op = toPostfix(tokens, postfix); if (!toPostfix_op.ok()) { return toPostfix_op; } + if (postfix.size() > 100) { + return Option(400, "`filter_by` has too many operations."); + } + Option toParseTree_op = toParseTree(postfix, root, search_schema, diff --git a/src/filter_result_iterator.cpp b/src/filter_result_iterator.cpp index 08933c70..a85721bb 100644 --- a/src/filter_result_iterator.cpp +++ b/src/filter_result_iterator.cpp @@ -1026,6 +1026,9 @@ void filter_result_iterator_t::init() { std::vector str_tokens; while (tokenizer.next(str_token, token_index)) { + if (str_token.size() > 100) { + str_token.erase(100); + } str_tokens.push_back(str_token); art_leaf* leaf = (art_leaf *) art_search(t, (const unsigned char*) str_token.c_str(), diff --git a/src/string_utils.cpp b/src/string_utils.cpp index d5401ee8..6b320fc2 100644 --- a/src/string_utils.cpp +++ b/src/string_utils.cpp @@ -582,54 +582,6 @@ size_t StringUtils::split_facet(const std::string &s, std::vector & return std::min(end_index, s.size()); } -void StringUtils::get_reference_collection_names(const std::string& filter_query, - std::set& reference_collection_names) { - auto size = filter_query.size(); - for (uint32_t i = 0; i < size;) { - auto c = filter_query[i]; - if (c == ' ' || c == '(' || c == ')') { - i++; - } else if (c == '&' || c == '|') { - i += 2; - } else { - // Reference filter would start with $ symbol. - if (c == '$') { - auto open_paren_pos = filter_query.find('(', ++i); - if (open_paren_pos == std::string::npos) { - return; - } - - auto reference_collection_name = filter_query.substr(i, open_paren_pos - i); - StringUtils::trim(reference_collection_name); - if (!reference_collection_name.empty()) { - reference_collection_names.insert(reference_collection_name); - } - - i = open_paren_pos; - int parenthesis_count = 1; - while (++i < size && parenthesis_count > 0) { - if (filter_query[i] == '(') { - parenthesis_count++; - } else if (filter_query[i] == ')') { - parenthesis_count--; - } - } - } else { - while (filter_query[++i] != ':'); - bool in_backtick = false; - do { - c = filter_query[++i]; - if (c == '`') { - in_backtick = !in_backtick; - } - } while (i < size && (in_backtick || (c != '(' && c != ')' && - !(c == '&' && filter_query[i + 1] == '&') && - !(c == '|' && filter_query[i + 1] == '|')))); - } - } - } -} - /*size_t StringUtils::unicode_length(const std::string& bytes) { std::wstring_convert, char32_t> utf8conv; return utf8conv.from_bytes(bytes).size(); diff --git a/test/collection_manager_test.cpp b/test/collection_manager_test.cpp index e821e51b..3c18db44 100644 --- a/test/collection_manager_test.cpp +++ b/test/collection_manager_test.cpp @@ -1245,3 +1245,33 @@ TEST_F(CollectionManagerTest, CloneCollection) { ASSERT_EQ('-', coll2->get_token_separators().at(0)); ASSERT_EQ('?', coll2->get_token_separators().at(1)); } + +TEST(StringUtilsTest, GetReferenceCollectionNames) { + std::string filter_query = ""; + std::set reference_collection_names; + CollectionManager::_get_reference_collection_names(filter_query, reference_collection_names); + ASSERT_TRUE(reference_collection_names.empty()); + + filter_query = "foo:bar"; + CollectionManager::_get_reference_collection_names(filter_query, reference_collection_names); + ASSERT_TRUE(reference_collection_names.empty()); + + filter_query = "$foo(bar:baz)"; + std::vector result = {"foo"}; + CollectionManager::_get_reference_collection_names(filter_query, reference_collection_names); + ASSERT_EQ(1, reference_collection_names.size()); + for (const auto &item: result) { + ASSERT_EQ(1, reference_collection_names.count(item)); + } + reference_collection_names.clear(); + + filter_query = "((age: <5 || age: >10) && category:= [shoes]) &&" + " $Customers(customer_id:=customer_a && (product_price:>100 && product_price:<200))"; + result = {"Customers"}; + CollectionManager::_get_reference_collection_names(filter_query, reference_collection_names); + ASSERT_EQ(1, reference_collection_names.size()); + for (const auto &item: result) { + ASSERT_EQ(1, reference_collection_names.count(item)); + } + reference_collection_names.clear(); +} diff --git a/test/string_utils_test.cpp b/test/string_utils_test.cpp index 6b931972..1ee9e6ac 100644 --- a/test/string_utils_test.cpp +++ b/test/string_utils_test.cpp @@ -420,33 +420,3 @@ TEST(StringUtilsTest, SplitIncludeFields) { tokens = {"id", "$Collection(title, pref*)", "count"}; splitIncludeTestHelper(include_fields, tokens); } - -TEST(StringUtilsTest, GetReferenceCollectionNames) { - std::string filter_query = ""; - std::set reference_collection_names; - StringUtils::get_reference_collection_names(filter_query, reference_collection_names); - ASSERT_TRUE(reference_collection_names.empty()); - - filter_query = "foo:bar"; - StringUtils::get_reference_collection_names(filter_query, reference_collection_names); - ASSERT_TRUE(reference_collection_names.empty()); - - filter_query = "$foo(bar:baz)"; - std::vector result = {"foo"}; - StringUtils::get_reference_collection_names(filter_query, reference_collection_names); - ASSERT_EQ(1, reference_collection_names.size()); - for (const auto &item: result) { - ASSERT_EQ(1, reference_collection_names.count(item)); - } - reference_collection_names.clear(); - - filter_query = "((age: <5 || age: >10) && category:= [shoes]) &&" - " $Customers(customer_id:=customer_a && (product_price:>100 && product_price:<200))"; - result = {"Customers"}; - StringUtils::get_reference_collection_names(filter_query, reference_collection_names); - ASSERT_EQ(1, reference_collection_names.size()); - for (const auto &item: result) { - ASSERT_EQ(1, reference_collection_names.count(item)); - } - reference_collection_names.clear(); -}