Parse filter query string.

2025-05-17 20:22:32 +08:00 · 2017-03-06 21:17:13 +05:30 · 2017-03-06 21:17:13 +05:30 · 96921be016
commit 96921be016
parent 0760e4d01b
9 changed files with 334 additions and 66 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -46,7 +46,7 @@ target_compile_definitions(search PRIVATE ROOT_DIR="${CMAKE_SOURCE_DIR}/")
 target_compile_definitions(benchmark PRIVATE ROOT_DIR="${CMAKE_SOURCE_DIR}/")
 target_compile_definitions(typesense_test PRIVATE ROOT_DIR="${CMAKE_SOURCE_DIR}/")

-target_link_libraries(typesense-server for curl h2o-evloop pthread rocksdb ssl crypto)
+target_link_libraries(typesense-server for h2o-evloop pthread rocksdb ssl crypto)
 target_link_libraries(search for pthread rocksdb)
 target_link_libraries(benchmark for pthread rocksdb)
 target_link_libraries(typesense_test pthread for rocksdb gtest gtest_main)
--- a/TODO.md
+++ b/TODO.md
@ -31,8 +31,10 @@
 - ~~Assumption that all tokens match for scoring is no longer true~~
 - Handle searching for non-existing fields gracefully
 - Intersection without unpacking
- Facets
 - Filters
+- Facets
+- Iterator
+- Highlight
 - Support search operators like +, - etc.
 - Prefix-search strings should not be null terminated
 - string_utils::tokenize should not have max length
--- a/include/collection.h
+++ b/include/collection.h
@ -8,6 +8,7 @@
 #include <topster.h>
 #include <json.hpp>
 #include <field.h>
+#include <option.h>

 class Collection {
 private:
@ -41,7 +42,7 @@ private:

    size_t union_of_leaf_ids(std::vector<const art_leaf *> &leaves, uint32_t **results_out);

-    uint32_t do_filtering(uint32_t** filter_ids_out, const std::vector<filter> & filters);
+    Option<uint32_t> do_filtering(uint32_t** filter_ids_out, const std::string & simple_filter_str);

    void search(uint32_t* filter_ids, size_t filter_ids_length, std::string & query, const std::string & field,
                const int num_typos, const size_t num_results, Topster<100> & topster, size_t & num_found,
@ -92,7 +93,7 @@ public:

    std::string add(std::string json_str);

-    nlohmann::json search(std::string query, const std::vector<std::string> fields, const std::vector<filter> filters,
+    nlohmann::json search(std::string query, const std::vector<std::string> fields, const std::string & simple_filter_str,
                          const int num_typos, const size_t num_results, const token_ordering token_order = FREQUENCY,
                          const bool prefix = false);

--- a/include/field.h
+++ b/include/field.h
@ -2,6 +2,8 @@

 #include <string>
 #include "art.h"
+#include "option.h"
+#include "string_utils.h"

 namespace field_types {
    static const std::string STRING = "STRING";
@ -24,24 +26,40 @@ struct field {
    field(std::string name, std::string type): name(name), type(type) {

    }
+
+    bool integer() {
+        return type == field_types::INT32 || type == field_types::INT32_ARRAY ||
+               type == field_types::INT64 || type == field_types::INT64_ARRAY;
+    }
 };

 struct filter {
    std::string field_name;
    std::vector<std::string> values;
-    std::string compare_operator;
+    NUM_COMPARATOR compare_operator;

-    NUM_COMPARATOR get_comparator() const {
-        if(compare_operator == "LESS_THAN") {
-            return LESS_THAN;
-        } else if(compare_operator == "LESS_THAN_EQUALS") {
-            return LESS_THAN_EQUALS;
-        } else if(compare_operator == "EQUALS") {
-            return EQUALS;
-        } else if(compare_operator == "GREATER_THAN") {
-            return GREATER_THAN;
-        } else {
-            return GREATER_THAN_EQUALS;
+    static Option<NUM_COMPARATOR> extract_num_comparator(const std::string & comp_and_value) {
+        if(StringUtils::is_integer(comp_and_value)) {
+            return Option<NUM_COMPARATOR>(EQUALS);
        }
+
+        // the ordering is important - we have to compare 2-letter operators first
+        if(comp_and_value.compare(0, 2, "<=") == 0) {
+            return Option<NUM_COMPARATOR>(LESS_THAN_EQUALS);
+        }
+
+        if(comp_and_value.compare(0, 2, ">=") == 0) {
+            return Option<NUM_COMPARATOR>(GREATER_THAN_EQUALS);
+        }
+
+        if(comp_and_value.compare(0, 1, "<") == 0) {
+            return Option<NUM_COMPARATOR>(LESS_THAN);
+        }
+
+        if(comp_and_value.compare(0, 1, ">") == 0) {
+            return Option<NUM_COMPARATOR>(GREATER_THAN);
+        }
+
+        return Option<NUM_COMPARATOR>(400, "Numerical field has an invalid comparator.");
    }
 };
--- a/include/option.h
+++ b/include/option.h
@ -0,0 +1,35 @@
+#pragma once
+#include <stdint.h>
+
+template <typename T=uint32_t>
+class Option {
+private:
+
+    T value;
+    bool is_ok;
+
+    std::string error_msg;
+    uint32_t code;
+
+public:
+
+    Option(const T & value): value(value), is_ok(true) {
+
+    }
+
+    Option(uint32_t code, const std::string & error_msg): code(code), error_msg(error_msg), is_ok(false) {
+
+    }
+
+    bool ok() {
+        return is_ok;
+    }
+
+    T get() {
+        return value;
+    }
+
+    std::string error() {
+        return error_msg;
+    }
+};
--- a/include/string_utils.h
+++ b/include/string_utils.h
@ -1,6 +1,7 @@
 #pragma once

 #include <string>
+#include <sstream>

 struct StringUtils {

@ -42,4 +43,82 @@ struct StringUtils {
        }
        return str;
    }
+
+    // Adapted from: http://stackoverflow.com/a/236180/131050
+    static void split(const std::string& s, std::vector<std::string> & result, const std::string& delim, const bool keep_empty = false) {
+        if (delim.empty()) {
+            result.push_back(s);
+            return ;
+        }
+        std::string::const_iterator substart = s.begin(), subend;
+        while (true) {
+            subend = search(substart, s.end(), delim.begin(), delim.end());
+            std::string temp(substart, subend);
+            temp = trim(temp);
+
+            if (keep_empty || !temp.empty()) {
+                result.push_back(temp);
+            }
+            if (subend == s.end()) {
+                break;
+            }
+            substart = subend + delim.size();
+        }
+    }
+
+    // Adapted from: http://stackoverflow.com/a/36000453/131050
+    static std::string & trim(std::string & str) {
+        // right trim
+        while (str.length () > 0 && (str [str.length ()-1] == ' ')) {
+            str.erase (str.length ()-1, 1);
+        }
+
+        // left trim
+        while (str.length () > 0 && (str [0] == ' ')) {
+            str.erase (0, 1);
+        }
+
+        return str;
+    }
+
+    // URL decoding - adapted from: http://stackoverflow.com/a/32595923/131050
+
+    static char from_hex(char ch) {
+        return isdigit(ch) ? ch - '0' : tolower(ch) - 'a' + 10;
+    }
+
+    static std::string url_decode(std::string text) {
+        char h;
+        std::ostringstream escaped;
+        escaped.fill('0');
+
+        for (auto i = text.begin(), n = text.end(); i != n; ++i) {
+            std::string::value_type c = (*i);
+
+            if (c == '%') {
+                if (i[1] && i[2]) {
+                    h = from_hex(i[1]) << 4 | from_hex(i[2]);
+                    escaped << h;
+                    i += 2;
+                }
+            } else if (c == '+') {
+                escaped << ' ';
+            } else {
+                escaped << c;
+            }
+        }
+
+        return escaped.str();
+    }
+
+    // Adapted from: http://stackoverflow.com/a/2845275/131050
+    static bool is_integer(const std::string &s) {
+        if(s.empty() || ((!isdigit(s[0])) && (s[0] != '-') && (s[0] != '+'))) {
+            return false;
+        }
+
+        char * p ;
+        strtol(s.c_str(), &p, 10);
+        return (*p == 0);
+    }
 };
--- a/src/collection.cpp
+++ b/src/collection.cpp
@ -266,7 +266,81 @@ size_t Collection::union_of_leaf_ids(std::vector<const art_leaf *> &leaves, uint
    return results_length;
 }

-uint32_t Collection::do_filtering(uint32_t** filter_ids_out, const std::vector<filter> & filters) {
+Option<uint32_t> Collection::do_filtering(uint32_t** filter_ids_out, const std::string & simple_filter_str) {
+    // parse the filter string
+    std::vector<std::string> filter_blocks;
+    StringUtils::split(simple_filter_str, filter_blocks, "&&");
+
+    std::vector<filter> filters;
+
+    for(const std::string & filter_block: filter_blocks) {
+        // split into [field_name, value]
+        std::vector<std::string> expression_parts;
+        StringUtils::split(filter_block, expression_parts, ":");
+        if(expression_parts.size() != 2) {
+            return Option<>(400, "Could not parse the filter query.");
+        }
+
+        const std::string & field_name = expression_parts[0];
+        if(schema.count(field_name) == 0) {
+            return Option<>(400, "Could not find a filter field named `" + field_name + "` in the schema.");
+        }
+
+        field _field = schema.at(field_name);
+        const std::string & raw_value = expression_parts[1];
+        filter f;
+
+        if(_field.integer()) {
+            // could be a single value or a list
+            if(raw_value[0] == '[' && raw_value[raw_value.size() - 1] == ']') {
+                std::vector<std::string> filter_values;
+                StringUtils::split(raw_value.substr(1, raw_value.size() - 2), filter_values, ",");
+
+                for(const std::string & filter_value: filter_values) {
+                    if(!StringUtils::is_integer(filter_value)) {
+                        return Option<>(400, "Error with field `" + _field.name + "`: Not an integer.");
+                    }
+                }
+
+                f = {field_name, filter_values, EQUALS};
+            } else {
+                Option<NUM_COMPARATOR> op_comparator = filter::extract_num_comparator(raw_value);
+                if(!op_comparator.ok()) {
+                    return Option<>(400, "Error with field `" + _field.name + "`: " + op_comparator.error());
+                }
+
+                // extract numerical value
+                std::string filter_value;
+                if(op_comparator.get() == LESS_THAN || op_comparator.get() == GREATER_THAN) {
+                    filter_value = raw_value.substr(1);
+                } else if(op_comparator.get() == LESS_THAN_EQUALS || op_comparator.get() == GREATER_THAN_EQUALS) {
+                    filter_value = raw_value.substr(2);
+                } else {
+                    // EQUALS
+                    filter_value = raw_value;
+                }
+
+                filter_value = StringUtils::trim(filter_value);
+
+                if(!StringUtils::is_integer(filter_value)) {
+                    return Option<>(400, "Error with field `" + _field.name + "`: Not an integer.");
+                }
+
+                f = {field_name, {filter_value}, op_comparator.get()};
+            }
+        } else {
+            if(raw_value[0] == '[' && raw_value[raw_value.size() - 1] == ']') {
+                std::vector<std::string> filter_values;
+                StringUtils::split(raw_value.substr(1, raw_value.size() - 2), filter_values, ",");
+                f = {field_name, filter_values, EQUALS};
+            } else {
+                f = {field_name, {raw_value}, EQUALS};
+            }
+        }
+
+        filters.push_back(f);
+    }
+
    uint32_t* filter_ids = nullptr;
    uint32_t filter_ids_length = 0;

@ -277,17 +351,14 @@ uint32_t Collection::do_filtering(uint32_t** filter_ids_out, const std::vector<f
            field f = schema.at(a_filter.field_name);
            std::vector<const art_leaf*> leaves;

-            if(f.type == field_types::INT32 || f.type == field_types::INT32_ARRAY ||
-               f.type == field_types::INT64 || f.type == field_types::INT64_ARRAY) {
+            if(f.integer()) {
                for(const std::string & filter_value: a_filter.values) {
                    if(f.type == field_types::INT32 || f.type == field_types::INT32_ARRAY) {
                        int32_t value = (int32_t) std::stoi(filter_value);
-                        NUM_COMPARATOR comparator = a_filter.get_comparator();
-                        art_int32_search(t, value, comparator, leaves);
+                        art_int32_search(t, value, a_filter.compare_operator, leaves);
                    } else {
                        int64_t value = (int64_t) std::stoi(filter_value);
-                        NUM_COMPARATOR comparator = a_filter.get_comparator();
-                        art_int64_search(t, value, comparator, leaves);
+                        art_int64_search(t, value, a_filter.compare_operator, leaves);
                    }
                }
            } else if(f.type == field_types::STRING || f.type == field_types::STRING_ARRAY) {
@ -316,17 +387,25 @@ uint32_t Collection::do_filtering(uint32_t** filter_ids_out, const std::vector<f
    }

    *filter_ids_out = filter_ids;
-    return filter_ids_length;
+    return Option<>(filter_ids_length);
 }

-nlohmann::json Collection::search(std::string query, const std::vector<std::string> fields, const std::vector<filter> filters,
+nlohmann::json Collection::search(std::string query, const std::vector<std::string> fields,
+                                  const std::string & simple_filter_str,
                                  const int num_typos, const size_t num_results,
                                  const token_ordering token_order, const bool prefix) {
    size_t num_found = 0;
+    nlohmann::json result = nlohmann::json::object();

    // process the filters first
    uint32_t* filter_ids = nullptr;
-    uint32_t filter_ids_length = do_filtering(&filter_ids, filters);
+    Option<uint32_t> op_filter_ids_length = do_filtering(&filter_ids, simple_filter_str);
+    if(!op_filter_ids_length.ok()) {
+        result["error"] = op_filter_ids_length.error();
+        return result;
+    }
+
+    const uint32_t filter_ids_length = op_filter_ids_length.get();

    // Order of `fields` are used to rank results
    auto begin = std::chrono::high_resolution_clock::now();
@ -336,7 +415,7 @@ nlohmann::json Collection::search(std::string query, const std::vector<std::stri
        Topster<100> topster;
        const std::string & field = fields[i];
        // proceed to query search only when no filters are provided or when filtering produces results
-        if(filters.size() == 0 || filter_ids_length > 0) {
+        if(simple_filter_str.size() == 0 || filter_ids_length > 0) {
            search(filter_ids, filter_ids_length, query, field, num_typos, num_results,
                   topster, num_found, token_order, prefix);
            topster.sort();
@ -358,7 +437,6 @@ nlohmann::json Collection::search(std::string query, const std::vector<std::stri
        return a.second.key > b.second.key;
    });

-    nlohmann::json result = nlohmann::json::object();
    result["hits"] = nlohmann::json::array();

    for(auto field_order_kv: field_order_kvs) {
--- a/src/main/server.cpp
+++ b/src/main/server.cpp
@ -28,7 +28,7 @@
 static h2o_globalconf_t config;
 static h2o_context_t ctx;
 static h2o_accept_ctx_t accept_ctx;
-std::vector<field> search_fields = {field("title", field_types::STRING)};
+std::vector<field> search_fields = {field("title", field_types::STRING), field("points", field_types::INT32)};
 std::vector<std::string> rank_fields = {"points"};
 Store *store = new Store("/tmp/typesense-data");

@ -52,14 +52,18 @@ std::map<std::string, std::string> parse_query(const std::string& query) {

    for (std::sregex_iterator i = words_begin; i != words_end; i++) {
        std::string key = (*i)[1].str();
-        std::string value = (*i)[2].str();
-        query_map[key] = StringUtils::replace_all(value, "%20", " ");
+        std::string raw_value = (*i)[2].str();
+        std::string value = StringUtils::url_decode(raw_value);
+        if(query_map.count(value) == 0) {
+            query_map[key] = value;
+        } else {
+            query_map[key] = query_map[key] + "&&" + value;
+        }
    }

    return query_map;
 }

-
 static int get_search(h2o_handler_t *self, h2o_req_t *req) {
    static h2o_generator_t generator = {NULL, NULL};
    h2o_iovec_t query = req->query_at != SIZE_MAX ?
@ -71,6 +75,7 @@ static int get_search(h2o_handler_t *self, h2o_req_t *req) {
    const char *NUM_TYPOS = "num_typos";
    const char *PREFIX = "prefix";
    const char *TOKEN_ORDERING = "token_ordering";
+    const char *FILTERS = "filters";

    if(query_map.count(NUM_TYPOS) == 0) {
        query_map[NUM_TYPOS] = "2";
@ -84,6 +89,9 @@ static int get_search(h2o_handler_t *self, h2o_req_t *req) {
        query_map[TOKEN_ORDERING] = "FREQUENCY";
    }

+    std::string filter_str = query_map.count(FILTERS) != 0 ? query_map[FILTERS] : "";
+    std::cout << "filter_str: " << filter_str << std::endl;
+
    token_ordering token_order = (query_map[TOKEN_ORDERING] == "MAX_SCORE") ? MAX_SCORE : FREQUENCY;

    //printf("Query: %s\n", query_map["q"].c_str());
@ -91,7 +99,7 @@ static int get_search(h2o_handler_t *self, h2o_req_t *req) {

    std::vector<std::string> search_fields = {"title"};

-    nlohmann::json result = collection->search(query_map["q"], search_fields, {}, std::stoi(query_map[NUM_TYPOS]),
+    nlohmann::json result = collection->search(query_map["q"], search_fields, filter_str, std::stoi(query_map[NUM_TYPOS]),
                                               100, token_order, false);
    std::string json_str = result.dump();
    //std::cout << "JSON:" << json_str << std::endl;
--- a/test/collection_test.cpp
+++ b/test/collection_test.cpp
@ -402,7 +402,7 @@ TEST_F(CollectionTest, FilterOnNumericFields) {

    // Plain search with no filters - results should be sorted by rank fields
    search_fields = {"name"};
-    nlohmann::json results = coll_array_fields->search("Jeremy", search_fields, {}, 0, 10, FREQUENCY, false);
+    nlohmann::json results = coll_array_fields->search("Jeremy", search_fields, "", 0, 10, FREQUENCY, false);
    ASSERT_EQ(5, results["hits"].size());

    std::vector<std::string> ids = {"3", "1", "4", "0", "2"};
@ -415,9 +415,7 @@ TEST_F(CollectionTest, FilterOnNumericFields) {
    }

    // Searching on an int32 field
-    std::vector<filter> filters = {(filter) {"age", {"24"}, "GREATER_THAN"}};
-
-    results = coll_array_fields->search("Jeremy", search_fields, filters, 0, 10, FREQUENCY, false);
+    results = coll_array_fields->search("Jeremy", search_fields, "age:>24", 0, 10, FREQUENCY, false);
    ASSERT_EQ(3, results["hits"].size());

    ids = {"3", "1", "4"};
@ -429,17 +427,14 @@ TEST_F(CollectionTest, FilterOnNumericFields) {
        ASSERT_STREQ(id.c_str(), result_id.c_str());
    }

-    filters = {(filter) {"age", {"24"}, "GREATER_THAN_EQUALS"}};
-    results = coll_array_fields->search("Jeremy", search_fields, filters, 0, 10, FREQUENCY, false);
+    results = coll_array_fields->search("Jeremy", search_fields, "age:>=24", 0, 10, FREQUENCY, false);
    ASSERT_EQ(4, results["hits"].size());

-    filters = {(filter) {"age", {"24"}, "EQUALS"}};
-    results = coll_array_fields->search("Jeremy", search_fields, filters, 0, 10, FREQUENCY, false);
+    results = coll_array_fields->search("Jeremy", search_fields, "age:24", 0, 10, FREQUENCY, false);
    ASSERT_EQ(1, results["hits"].size());

    // Searching a number against an int32 array field
-    filters = {(filter) {"years", {"2002"}, "GREATER_THAN"}};
-    results = coll_array_fields->search("Jeremy", search_fields, filters, 0, 10, FREQUENCY, false);
+    results = coll_array_fields->search("Jeremy", search_fields, "years:>2002", 0, 10, FREQUENCY, false);
    ASSERT_EQ(3, results["hits"].size());

    ids = {"1", "0", "2"};
@ -450,8 +445,7 @@ TEST_F(CollectionTest, FilterOnNumericFields) {
        ASSERT_STREQ(id.c_str(), result_id.c_str());
    }

-    filters = {(filter) {"years", {"1989"}, "LESS_THAN"}};
-    results = coll_array_fields->search("Jeremy", search_fields, filters, 0, 10, FREQUENCY, false);
+    results = coll_array_fields->search("Jeremy", search_fields, "years:<1989", 0, 10, FREQUENCY, false);
    ASSERT_EQ(1, results["hits"].size());

    ids = {"3"};
@ -463,8 +457,7 @@ TEST_F(CollectionTest, FilterOnNumericFields) {
    }

    // multiple filters
-    filters = {(filter) {"years", {"2005"}, "LESS_THAN"}, (filter) {"years", {"1987"}, "GREATER_THAN"}};
-    results = coll_array_fields->search("Jeremy", search_fields, filters, 0, 10, FREQUENCY, false);
+    results = coll_array_fields->search("Jeremy", search_fields, "years:<2005 && years:>1987", 0, 10, FREQUENCY, false);
    ASSERT_EQ(1, results["hits"].size());

    ids = {"4"};
@ -476,8 +469,7 @@ TEST_F(CollectionTest, FilterOnNumericFields) {
    }

    // multiple search values (works like SQL's IN operator) against a single int field
-    filters = {(filter) {"age", {"21", "24", "63"}, "EQUALS"}};
-    results = coll_array_fields->search("Jeremy", search_fields, filters, 0, 10, FREQUENCY, false);
+    results = coll_array_fields->search("Jeremy", search_fields, "age:[21, 24, 63]", 0, 10, FREQUENCY, false);
    ASSERT_EQ(3, results["hits"].size());

    ids = {"3", "0", "2"};
@ -488,9 +480,8 @@ TEST_F(CollectionTest, FilterOnNumericFields) {
        ASSERT_STREQ(id.c_str(), result_id.c_str());
    }

-    // multiple search values against an int32 array field
-    filters = {(filter) {"years", {"2015", "1985", "1999"}, "EQUALS"}};
-    results = coll_array_fields->search("Jeremy", search_fields, filters, 0, 10, FREQUENCY, false);
+    // multiple search values against an int32 array field - also use extra padding between symbols
+    results = coll_array_fields->search("Jeremy", search_fields, "years : [ 2015, 1985 , 1999]", 0, 10, FREQUENCY, false);
    ASSERT_EQ(4, results["hits"].size());

    ids = {"3", "1", "4", "0"};
@ -501,10 +492,8 @@ TEST_F(CollectionTest, FilterOnNumericFields) {
        ASSERT_STREQ(id.c_str(), result_id.c_str());
    }

-    // searching on an int64 array field
-    filters = {(filter) {"timestamps", {"475205222"}, "GREATER_THAN"}};
-
-    results = coll_array_fields->search("Jeremy", search_fields, filters, 0, 10, FREQUENCY, false);
+    // searching on an int64 array field - also ensure that padded space causes no issues
+    results = coll_array_fields->search("Jeremy", search_fields, "timestamps : > 475205222", 0, 10, FREQUENCY, false);
    ASSERT_EQ(4, results["hits"].size());

    ids = {"1", "4", "0", "2"};
@ -517,8 +506,7 @@ TEST_F(CollectionTest, FilterOnNumericFields) {
    }

    // when filters don't match any record, no results should be returned
-    filters = {(filter) {"timestamps", {"1"}, "LESS_THAN"}};
-    results = coll_array_fields->search("Jeremy", search_fields, filters, 0, 10, FREQUENCY, false);
+    results = coll_array_fields->search("Jeremy", search_fields, "timestamps:<1", 0, 10, FREQUENCY, false);
    ASSERT_EQ(0, results["hits"].size());

    collectionManager.drop_collection("coll_array_fields");
@ -547,9 +535,7 @@ TEST_F(CollectionTest, FilterOnTextFields) {
    infile.close();

    search_fields = {"name"};
-    std::vector<filter> filters = {(filter) {"tags", {"gold"}, "EQUALS"}};
-
-    nlohmann::json results = coll_array_fields->search("Jeremy", search_fields, filters, 0, 10, FREQUENCY, false);
+    nlohmann::json results = coll_array_fields->search("Jeremy", search_fields, "tags: gold", 0, 10, FREQUENCY, false);
    ASSERT_EQ(4, results["hits"].size());

    std::vector<std::string> ids = {"1", "4", "0", "2"};
@ -561,9 +547,7 @@ TEST_F(CollectionTest, FilterOnTextFields) {
        ASSERT_STREQ(id.c_str(), result_id.c_str());
    }

-    filters = {(filter) {"tags", {"bronze"}, "EQUALS"}};
-
-    results = coll_array_fields->search("Jeremy", search_fields, filters, 0, 10, FREQUENCY, false);
+    results = coll_array_fields->search("Jeremy", search_fields, "tags : bronze", 0, 10, FREQUENCY, false);
    ASSERT_EQ(2, results["hits"].size());

    ids = {"4", "2"};
@ -575,10 +559,73 @@ TEST_F(CollectionTest, FilterOnTextFields) {
        ASSERT_STREQ(id.c_str(), result_id.c_str());
    }

+    // search with a list of tags, also testing extra padding of space
+    results = coll_array_fields->search("Jeremy", search_fields, "tags: [bronze,   silver]", 0, 10, FREQUENCY, false);
+    ASSERT_EQ(4, results["hits"].size());
+
+    ids = {"3", "4", "0", "2"};
+
+    for(size_t i = 0; i < results["hits"].size(); i++) {
+        nlohmann::json result = results["hits"].at(i);
+        std::string result_id = result["id"];
+        std::string id = ids.at(i);
+        ASSERT_STREQ(id.c_str(), result_id.c_str());
+    }
+
    // should be exact matches (no normalization or fuzzy searching should happen)
-    filters = {(filter) {"tags", {"BRONZE"}, "EQUALS"}};
-    results = coll_array_fields->search("Jeremy", search_fields, filters, 0, 10, FREQUENCY, false);
+    results = coll_array_fields->search("Jeremy", search_fields, "tags: BRONZE", 0, 10, FREQUENCY, false);
    ASSERT_EQ(0, results["hits"].size());

    collectionManager.drop_collection("coll_array_fields");
+}
+
+TEST_F(CollectionTest, HandleBadlyFormedFilterQuery) {
+    // should not crash when filter query is malformed!
+    Collection *coll_array_fields;
+
+    std::ifstream infile(std::string(ROOT_DIR)+"test/numeric_array_documents.jsonl");
+    std::vector<field> fields = {field("name", field_types::STRING), field("age", field_types::INT32),
+                                 field("years", field_types::INT32_ARRAY),
+                                 field("timestamps", field_types::INT64_ARRAY),
+                                 field("tags", field_types::STRING_ARRAY)};
+    std::vector<std::string> rank_fields = {"age"};
+
+    coll_array_fields = collectionManager.get_collection("coll_array_fields");
+    if(coll_array_fields == nullptr) {
+        coll_array_fields = collectionManager.create_collection("coll_array_fields", fields, rank_fields);
+    }
+
+    std::string json_line;
+
+    while (std::getline(infile, json_line)) {
+        coll_array_fields->add(json_line);
+    }
+
+    infile.close();
+
+    search_fields = {"name"};
+
+    // when filter field does not exist in the schema
+    nlohmann::json results = coll_array_fields->search("Jeremy", search_fields, "tagzz: gold", 0, 10, FREQUENCY, false);
+    ASSERT_EQ(0, results["hits"].size());
+
+    // searching using a string for a numeric field
+    results = coll_array_fields->search("Jeremy", search_fields, "age: abcdef", 0, 10, FREQUENCY, false);
+    ASSERT_EQ(0, results["hits"].size());
+
+    // searching using a string for a numeric array field
+    results = coll_array_fields->search("Jeremy", search_fields, "timestamps: abcdef", 0, 10, FREQUENCY, false);
+    ASSERT_EQ(0, results["hits"].size());
+
+    // malformed k:v syntax
+    results = coll_array_fields->search("Jeremy", search_fields, "timestamps abcdef", 0, 10, FREQUENCY, false);
+    ASSERT_EQ(0, results["hits"].size());
+
+    // just empty spaces
+    results = coll_array_fields->search("Jeremy", search_fields, "  ", 0, 10, FREQUENCY, false);
+    ASSERT_EQ(0, results["hits"].size());
+
+    // wrapping number with quotes
+    results = coll_array_fields->search("Jeremy", search_fields, "age: '21'", 0, 10, FREQUENCY, false);
+    ASSERT_EQ(0, results["hits"].size());
 }