Collection operations on float fields.

2025-05-16 19:55:21 +08:00 · 2017-08-10 18:20:58 -04:00 · 2017-08-10 18:20:58 -04:00 · e384b777a1
commit e384b777a1
parent a2f475d7fc
9 changed files with 317 additions and 41 deletions
--- a/TODO.md
+++ b/TODO.md
@ -48,18 +48,26 @@
 - ~~Fetch an individual document~~
 - ~~ID field should be a string: must validate~~
 - ~~Number of records in collection~~
+- ~~Test for asc/desc upper/lower casing~~
+- ~~Test for search without any sort_by given~~
+- ~~Test for collection creation validation~~
+- ~~Test for delete document~~
+- ~~art float search~~
+- When prefix=true, use token_ranking_field for token ordering only for last word
+- only last token should be prefix searched
+- test for token ranking on float field
+- test for float int field deletion during doc deletion
+- Prefix-search strings should not be null terminated
+- > INT32_MAX validation for float field
+- art bool support
+- Proper logging
 - Add docs/explanation around ranking calc
 - Use rocksdb batch put for atomic insertion
- When prefix=true, use token_ranking_field for token ordering only for last word
 - Query token ids should match query token ordering
 - ID should not have "/"
 - Group results by field
 - Handle store-get() not finding a key
 - Delete using range: https://github.com/facebook/rocksdb/wiki/Delete-A-Range-Of-Keys
- ~~Test for asc/desc upper/lower casing~~
- ~~Test for search without any sort_by given~~
- ~~Test for collection creation validation~~
- ~~Test for delete document~~
 - Test for sorted_array::indexOf when length is 0
 - Test for snippets
 - Test for pagination
@ -70,18 +78,12 @@
 - UTF-8 support for fuzzy search
 - Handle searching for non-existing fields gracefully
 - test for same match score but different primary, secondary attr
- only last token should be prefix searched
- Intersection without unpacking
 - Support nested fields via "."
 - Support search operators like +, - etc.
- Prefix-search strings should not be null terminated
 - string_utils::tokenize should not have max length
- art float search
- Benchmark with -ffast-math
 - Space sensitivity
- Use bitmap index instead of compressed array for doc list
- Primary_rank_scores and secondary_rank_scores hashmaps should be combined
- Proper logging
+- Use bitmap index instead of compressed array for doc list?
+- Primary_rank_scores and secondary_rank_scores hashmaps should be combined?
 - d-ary heap?

 **API**
@ -105,4 +107,5 @@

 **Tech debt**

- ~~Use GLOB file pattern for CMake (better IDE refactoring support)~~
+- ~~Use GLOB file pattern for CMake (better IDE refactoring support)~~
+- DRY index_int64_field* methods
--- a/include/collection.h
+++ b/include/collection.h
@ -103,14 +103,18 @@ private:
    void index_string_array_field(const std::vector<std::string> & strings, const uint32_t score, art_tree *t,
                                  uint32_t seq_id, const bool verbatim) const;

-    void index_int32_field(const int32_t value, uint32_t score, art_tree *t, uint32_t seq_id) const;
+    void index_int32_field(const int32_t value, const uint32_t score, art_tree *t, uint32_t seq_id) const;

-    void index_int64_field(const int64_t value, uint32_t score, art_tree *t, uint32_t seq_id) const;
+    void index_int64_field(const int64_t value, const uint32_t score, art_tree *t, uint32_t seq_id) const;
+
+    void index_float_field(const float value, const uint32_t score, art_tree *t, uint32_t seq_id) const;

    void index_int32_array_field(const std::vector<int32_t> & values, const uint32_t score, art_tree *t, uint32_t seq_id) const;

    void index_int64_array_field(const std::vector<int64_t> & values, const uint32_t score, art_tree *t, uint32_t seq_id) const;

+    void index_float_array_field(const std::vector<float> & values, const uint32_t score, art_tree *t, uint32_t seq_id) const;
+
    void remove_and_shift_offset_index(sorted_array &offset_index, const uint32_t *indices_sorted,
                                       const uint32_t indices_length);

--- a/include/field.h
+++ b/include/field.h
@ -9,6 +9,8 @@ namespace field_types {
    static const std::string STRING = "STRING";
    static const std::string INT32 = "INT32";
    static const std::string INT64 = "INT64";
+    static const std::string FLOAT = "FLOAT";
+    static const std::string FLOAT_ARRAY = "FLOAT_ARRAY";
    static const std::string STRING_ARRAY = "STRING_ARRAY";
    static const std::string INT32_ARRAY = "INT32_ARRAY";
    static const std::string INT64_ARRAY = "INT64_ARRAY";
@ -27,9 +29,17 @@ struct field {

    }

-    bool integer() {
-        return type == field_types::INT32 || type == field_types::INT32_ARRAY ||
-               type == field_types::INT64 || type == field_types::INT64_ARRAY;
+    bool is_integer() {
+        return (type == field_types::INT32 || type == field_types::INT32_ARRAY ||
+               type == field_types::INT64 || type == field_types::INT64_ARRAY);
+    }
+
+    bool is_float() {
+        return (type == field_types::FLOAT || type == field_types::FLOAT_ARRAY);
+    }
+
+    bool is_string() {
+        return (type == field_types::STRING || type == field_types::STRING_ARRAY);
    }
 };

--- a/include/string_utils.h
+++ b/include/string_utils.h
@ -72,6 +72,28 @@ struct StringUtils {
        return escaped.str();
    }

+    // See: https://stackoverflow.com/a/19751887/131050
+    static bool is_float(const std::string &s) {
+        std::string::const_iterator it = s.begin();
+        bool decimalPoint = false;
+        int minSize = 0;
+        if(s.size() > 0 && (s[0] == '-' || s[0] == '+')) {
+            it++;
+            minSize++;
+        }
+
+        while(it != s.end()){
+            if(*it == '.') {
+                if(!decimalPoint) decimalPoint = true;
+                else break;
+            } else if(!std::isdigit(*it) && ((*it!='f') || it+1 != s.end() || !decimalPoint)) {
+                break;
+            }
+            ++it;
+        }
+        return s.size() > minSize && it == s.end();
+    }
+
    // Adapted from: http://stackoverflow.com/a/2845275/131050
    static bool is_integer(const std::string &s) {
        if(s.empty() || ((!isdigit(s[0])) && (s[0] != '-') && (s[0] != '+'))) {
--- a/src/collection.cpp
+++ b/src/collection.cpp
@ -85,7 +85,7 @@ Option<uint32_t> Collection::index_in_memory(const nlohmann::json &document, uin
    }

    if(!token_ranking_field.empty() && !document[token_ranking_field].is_number()) {
-        return Option<>(400, "Token ranking field `" + token_ranking_field  + "` must be an INT32.");
+        return Option<>(400, "Token ranking field `" + token_ranking_field  + "` must be a number.");
    }

    if(!token_ranking_field.empty() && document[token_ranking_field].get<int64_t>() > INT32_MAX) {
@ -114,7 +114,7 @@ Option<uint32_t> Collection::index_in_memory(const nlohmann::json &document, uin
            const std::string & text = document[field_name];
            index_string_field(text, points, t, seq_id, false);
        } else if(field_pair.second.type == field_types::INT32) {
-            if(!document[field_name].is_number()) {
+            if(!document[field_name].is_number_integer()) {
                return Option<>(400, "Search field `" + field_name  + "` must be an INT32.");
            }

@ -125,12 +125,19 @@ Option<uint32_t> Collection::index_in_memory(const nlohmann::json &document, uin
            uint32_t value = document[field_name];
            index_int32_field(value, points, t, seq_id);
        } else if(field_pair.second.type == field_types::INT64) {
-            if(!document[field_name].is_number()) {
+            if(!document[field_name].is_number_integer()) {
                return Option<>(400, "Search field `" + field_name  + "` must be an INT64.");
            }

            uint64_t value = document[field_name];
            index_int64_field(value, points, t, seq_id);
+        } else if(field_pair.second.type == field_types::FLOAT) {
+            if(!document[field_name].is_number_float()) {
+                return Option<>(400, "Search field `" + field_name  + "` must be a FLOAT.");
+            }
+
+            float value = document[field_name];
+            index_float_field(value, points, t, seq_id);
        } else if(field_pair.second.type == field_types::STRING_ARRAY) {
            if(!document[field_name].is_array()) {
                return Option<>(400, "Search field `" + field_name  + "` must be a STRING_ARRAY.");
@ -147,7 +154,7 @@ Option<uint32_t> Collection::index_in_memory(const nlohmann::json &document, uin
                return Option<>(400, "Search field `" + field_name  + "` must be an INT32_ARRAY.");
            }

-            if(document[field_name].size() > 0 && !document[field_name][0].is_number()) {
+            if(document[field_name].size() > 0 && !document[field_name][0].is_number_integer()) {
                return Option<>(400, "Search field `" + field_name  + "` must be an INT32_ARRAY.");
            }

@ -158,12 +165,23 @@ Option<uint32_t> Collection::index_in_memory(const nlohmann::json &document, uin
                return Option<>(400, "Search field `" + field_name  + "` must be an INT64_ARRAY.");
            }

-            if(document[field_name].size() > 0 && !document[field_name][0].is_number()) {
+            if(document[field_name].size() > 0 && !document[field_name][0].is_number_integer()) {
                return Option<>(400, "Search field `" + field_name  + "` must be an INT64_ARRAY.");
            }

            std::vector<int64_t> values = document[field_name];
            index_int64_array_field(values, points, t, seq_id);
+        } else if(field_pair.second.type == field_types::FLOAT_ARRAY) {
+            if(!document[field_name].is_array()) {
+                return Option<>(400, "Search field `" + field_name  + "` must be an FLOAT_ARRAY.");
+            }
+
+            if(document[field_name].size() > 0 && !document[field_name][0].is_number_float()) {
+                return Option<>(400, "Search field `" + field_name  + "` must be an FLOAT_ARRAY.");
+            }
+
+            std::vector<float> values = document[field_name];
+            index_float_array_field(values, points, t, seq_id);
        }
    }

@ -260,6 +278,30 @@ void Collection::index_int64_field(const int64_t value, uint32_t score, art_tree
    art_insert(t, key, KEY_LEN, &art_doc, num_hits);
 }

+void Collection::index_float_field(const float value, uint32_t score, art_tree *t, uint32_t seq_id) const {
+    const int KEY_LEN = 8;
+    unsigned char key[KEY_LEN];
+
+    encode_float(value, key);
+
+    uint32_t num_hits = 0;
+    art_leaf* leaf = (art_leaf *) art_search(t, key, KEY_LEN);
+    if(leaf != NULL) {
+        num_hits = leaf->values->ids.getLength();
+    }
+
+    num_hits += 1;
+
+    art_document art_doc;
+    art_doc.id = seq_id;
+    art_doc.score = score;
+    art_doc.offsets_len = 0;
+    art_doc.offsets = nullptr;
+
+    art_insert(t, key, KEY_LEN, &art_doc, num_hits);
+}
+
+
 void Collection::index_string_field(const std::string & text, const uint32_t score, art_tree *t,
                                    uint32_t seq_id, const bool verbatim) const {
    std::vector<std::string> tokens;
@ -327,6 +369,13 @@ void Collection::index_int64_array_field(const std::vector<int64_t> & values, co
    }
 }

+void Collection::index_float_array_field(const std::vector<float> & values, const float score, art_tree *t,
+                             uint32_t seq_id) const {
+    for(const float value: values) {
+        index_float_field(value, score, t, seq_id);
+    }
+}
+
 void Collection::do_facets(std::vector<facet> & facets, uint32_t* result_ids, size_t results_size) {
    for(auto & a_facet: facets) {
        // assumed that facet fields have already been validated upstream
@ -466,16 +515,20 @@ Option<uint32_t> Collection::do_filtering(uint32_t** filter_ids_out, const std::
        const std::string & raw_value = expression_parts[1];
        filter f;

-        if(_field.integer()) {
+        if(_field.is_integer() || _field.is_float()) {
            // could be a single value or a list
            if(raw_value[0] == '[' && raw_value[raw_value.size() - 1] == ']') {
                std::vector<std::string> filter_values;
                StringUtils::split(raw_value.substr(1, raw_value.size() - 2), filter_values, ",");

                for(const std::string & filter_value: filter_values) {
-                    if(!StringUtils::is_integer(filter_value)) {
+                    if(_field.is_integer() && !StringUtils::is_integer(filter_value)) {
                        return Option<>(400, "Error with field `" + _field.name + "`: Not an integer.");
                    }
+
+                    if(_field.is_float() && !StringUtils::is_float(filter_value)) {
+                        return Option<>(400, "Error with field `" + _field.name + "`: Not a float.");
+                    }
                }

                f = {field_name, filter_values, EQUALS};
@ -498,13 +551,17 @@ Option<uint32_t> Collection::do_filtering(uint32_t** filter_ids_out, const std::

                filter_value = StringUtils::trim(filter_value);

-                if(!StringUtils::is_integer(filter_value)) {
+                if(_field.is_integer() && !StringUtils::is_integer(filter_value)) {
                    return Option<>(400, "Error with field `" + _field.name + "`: Not an integer.");
                }

+                if(_field.is_float() && !StringUtils::is_float(filter_value)) {
+                    return Option<>(400, "Error with field `" + _field.name + "`: Not a float.");
+                }
+
                f = {field_name, {filter_value}, op_comparator.get()};
            }
-        } else {
+        } else if(_field.is_string()) {
            if(raw_value[0] == '[' && raw_value[raw_value.size() - 1] == ']') {
                std::vector<std::string> filter_values;
                StringUtils::split(raw_value.substr(1, raw_value.size() - 2), filter_values, ",");
@ -512,6 +569,8 @@ Option<uint32_t> Collection::do_filtering(uint32_t** filter_ids_out, const std::
            } else {
                f = {field_name, {raw_value}, EQUALS};
            }
+        } else {
+            return Option<>(400, "Error with field `" + _field.name + "`: Unidentified field type.");
        }

        filters.push_back(f);
@ -527,7 +586,7 @@ Option<uint32_t> Collection::do_filtering(uint32_t** filter_ids_out, const std::
            field f = search_schema.at(a_filter.field_name);
            std::vector<const art_leaf*> leaves;

-            if(f.integer()) {
+            if(f.is_integer()) {
                for(const std::string & filter_value: a_filter.values) {
                    if(f.type == field_types::INT32 || f.type == field_types::INT32_ARRAY) {
                        int32_t value = (int32_t) std::stoi(filter_value);
@ -537,7 +596,12 @@ Option<uint32_t> Collection::do_filtering(uint32_t** filter_ids_out, const std::
                        art_int64_search(t, value, a_filter.compare_operator, leaves);
                    }
                }
-            } else if(f.type == field_types::STRING || f.type == field_types::STRING_ARRAY) {
+            } else if(f.is_float()) {
+                for(const std::string & filter_value: a_filter.values) {
+                    float value = (float) std::atof(filter_value.c_str());
+                    art_float_search(t, value, a_filter.compare_operator, leaves);
+                }
+            } else if(f.is_string()) {
                for(const std::string & filter_value: a_filter.values) {
                    art_leaf* leaf = (art_leaf *) art_search(t, (const unsigned char*) filter_value.c_str(), filter_value.length()+1);
                    if(leaf != nullptr) {
@ -1153,6 +1217,7 @@ Option<std::string> Collection::remove(const std::string & id) {
    nlohmann::json document = nlohmann::json::parse(parsed_document);

    for(auto & name_field: search_schema) {
+        // Go through all the field names and find the keys+values so that they can be removed from in-memory index
        std::vector<std::string> tokens;
        if(name_field.second.type == field_types::STRING) {
            StringUtils::split(document[name_field.first], tokens, " ");
@ -1186,6 +1251,20 @@ Option<std::string> Collection::remove(const std::string & id) {
                encode_int64(value, key);
                tokens.push_back(std::string((char*)key, KEY_LEN));
            }
+        } else if(name_field.second.type == field_types::FLOAT) {
+            const int KEY_LEN = 8;
+            unsigned char key[KEY_LEN];
+            int64_t value = document[name_field.first].get<int64_t>();
+            encode_float(value, key);
+            tokens.push_back(std::string((char*)key, KEY_LEN));
+        } else if(name_field.second.type == field_types::FLOAT_ARRAY) {
+            std::vector<float> values = document[name_field.first].get<std::vector<float>>();
+            for(const float value: values) {
+                const int KEY_LEN = 8;
+                unsigned char key[KEY_LEN];
+                encode_float(value, key);
+                tokens.push_back(std::string((char*)key, KEY_LEN));
+            }
        }

        for(auto & token: tokens) {
--- a/src/main/benchmark.cpp
+++ b/src/main/benchmark.cpp
@ -22,14 +22,15 @@ int main(int argc, char* argv[]) {
    CollectionManager & collectionManager = CollectionManager::get_instance();
    collectionManager.init(store, "abcd");

-    Collection *collection = collectionManager.get_collection("collection");
+    Collection *collection = collectionManager.get_collection("hnstories_direct");
    if(collection == nullptr) {
-        collection = collectionManager.create_collection("collection", fields_to_index, {}, sort_fields);
+        collection = collectionManager.create_collection("hnstories_direct", fields_to_index, {}, sort_fields);
    }

-    std::ifstream infile("/Users/kishore/Downloads/hnstories_small.jsonl");
+    std::ifstream infile("/Users/kishore/Downloads/hnstories.jsonl");

    std::string json_line;
+    auto begin = std::chrono::high_resolution_clock::now();

    while (std::getline(infile, json_line)) {
        collection->add(json_line);
@ -38,7 +39,7 @@ int main(int argc, char* argv[]) {
    infile.close();
    cout << "FINISHED INDEXING!" << endl << flush;

-    std::vector<std::string> search_fields = {"title"};
+    /*std::vector<std::string> search_fields = {"title"};

    std::vector<string> queries = {"the", "and", "to", "of", "in"};
    auto counter = 0;
@ -51,10 +52,10 @@ int main(int argc, char* argv[]) {
        auto results = collection->search(queries[i], search_fields, "", { }, {sort_field("points", "DESC")}, 1, 10, 1, MAX_SCORE, 0).get();
        results_total += results.size();
        counter++;
-    }
+    }*/

    long long int timeMillis = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now() - begin).count();
    cout << "Time taken: " << timeMillis << "ms" << endl;
-    cout << "Total: " << results_total << endl;
+    //cout << "Total: " << results_total << endl;
    return 0;
 }
--- a/test/art_test.cpp
+++ b/test/art_test.cpp
@ -1138,6 +1138,11 @@ TEST(ArtTest, test_encode_float_positive) {
    ASSERT_EQ(1, results.size());
    results.clear();

+    res = art_float_search(&t, 0.0, GREATER_THAN, results);
+    ASSERT_TRUE(res == 0);
+    ASSERT_EQ(5, results.size());
+    results.clear();
+
    res = art_float_search(&t, 10.5678, LESS_THAN, results);
    ASSERT_TRUE(res == 0);
    ASSERT_EQ(4, results.size());
@ -1153,10 +1158,20 @@ TEST(ArtTest, test_encode_float_positive) {
    ASSERT_EQ(1, results.size());
    results.clear();

+    res = art_float_search(&t, 10.4, GREATER_THAN, results);
+    ASSERT_TRUE(res == 0);
+    ASSERT_EQ(2, results.size());
+    results.clear();
+
    res = art_float_search(&t, 10.5678, GREATER_THAN_EQUALS, results);
    ASSERT_TRUE(res == 0);
    ASSERT_EQ(2, results.size());
    results.clear();
+
+    res = art_float_search(&t, 10, GREATER_THAN_EQUALS, results);
+    ASSERT_TRUE(res == 0);
+    ASSERT_EQ(2, results.size());
+    results.clear();
 }

 TEST(ArtTest, test_encode_float_positive_negative) {
@ -1204,4 +1219,9 @@ TEST(ArtTest, test_encode_float_positive_negative) {
    ASSERT_TRUE(res == 0);
    ASSERT_EQ(6, results.size());
    results.clear();
+
+    res = art_float_search(&t, -24, GREATER_THAN_EQUALS, results);
+    ASSERT_TRUE(res == 0);
+    ASSERT_EQ(5, results.size());
+    results.clear();
 }
--- a/test/collection_test.cpp
+++ b/test/collection_test.cpp
@ -597,6 +597,143 @@ TEST_F(CollectionTest, FilterOnNumericFields) {
    collectionManager.drop_collection("coll_array_fields");
 }

+TEST_F(CollectionTest, FilterOnFloatFields) {
+    Collection *coll_array_fields;
+
+    std::ifstream infile(std::string(ROOT_DIR)+"test/numeric_array_documents.jsonl");
+    std::vector<field> fields = {field("name", field_types::STRING), field("age", field_types::INT32),
+                                 field("top_3", field_types::FLOAT_ARRAY),
+                                 field("rating", field_types::FLOAT)};
+    std::vector<field> sort_fields_index = { field("rating", "FLOAT") };
+    std::vector<sort_field> sort_fields_desc = { sort_field("rating", "DESC") };
+    std::vector<sort_field> sort_fields_asc = { sort_field("rating", "ASC") };
+
+    coll_array_fields = collectionManager.get_collection("coll_array_fields");
+    if(coll_array_fields == nullptr) {
+        coll_array_fields = collectionManager.create_collection("coll_array_fields", fields, facet_fields, sort_fields_index);
+    }
+
+    std::string json_line;
+
+    while (std::getline(infile, json_line)) {
+        coll_array_fields->add(json_line);
+    }
+
+    infile.close();
+
+    // Plain search with no filters - results should be sorted by rating field DESC
+    query_fields = {"name"};
+    std::vector<std::string> facets;
+    nlohmann::json results = coll_array_fields->search("Jeremy", query_fields, "", facets, sort_fields_desc, 0, 10, 1, FREQUENCY, false).get();
+    ASSERT_EQ(5, results["hits"].size());
+
+    std::vector<std::string> ids = {"1", "2", "4", "0", "3"};
+
+    for(size_t i = 0; i < results["hits"].size(); i++) {
+        nlohmann::json result = results["hits"].at(i);
+        std::string result_id = result["id"];
+        std::string id = ids.at(i);
+        ASSERT_STREQ(id.c_str(), result_id.c_str());
+    }
+
+    // Plain search with no filters - results should be sorted by rating field ASC
+    results = coll_array_fields->search("Jeremy", query_fields, "", facets, sort_fields_asc, 0, 10, 1, FREQUENCY, false).get();
+    ASSERT_EQ(5, results["hits"].size());
+
+    ids = {"3", "0", "4", "2", "1"};
+
+    for(size_t i = 0; i < results["hits"].size(); i++) {
+        nlohmann::json result = results["hits"].at(i);
+        std::string result_id = result["id"];
+        std::string id = ids.at(i);
+        ASSERT_STREQ(id.c_str(), result_id.c_str());
+    }
+
+    // Searching on a float field, sorted desc by rating
+    results = coll_array_fields->search("Jeremy", query_fields, "rating:>0.0", facets, sort_fields_desc, 0, 10, 1, FREQUENCY, false).get();
+    ASSERT_EQ(4, results["hits"].size());
+
+    ids = {"1", "2", "4", "0"};
+
+    for(size_t i = 0; i < results["hits"].size(); i++) {
+        nlohmann::json result = results["hits"].at(i);
+        std::string result_id = result["id"];
+        std::string id = ids.at(i);
+        ASSERT_STREQ(id.c_str(), result_id.c_str());
+    }
+
+    // Searching a float against an float array field
+    results = coll_array_fields->search("Jeremy", query_fields, "top_3:>7.8", facets, sort_fields_desc, 0, 10, 1, FREQUENCY, false).get();
+    ASSERT_EQ(2, results["hits"].size());
+
+    ids = {"1", "2"};
+    for(size_t i = 0; i < results["hits"].size(); i++) {
+        nlohmann::json result = results["hits"].at(i);
+        std::string result_id = result["id"];
+        std::string id = ids.at(i);
+        ASSERT_STREQ(id.c_str(), result_id.c_str());
+    }
+
+    // multiple filters
+    results = coll_array_fields->search("Jeremy", query_fields, "top_3:>7.8 && rating:>7.9", facets, sort_fields_desc, 0, 10, 1, FREQUENCY, false).get();
+    ASSERT_EQ(1, results["hits"].size());
+
+    ids = {"1"};
+    for(size_t i = 0; i < results["hits"].size(); i++) {
+        nlohmann::json result = results["hits"].at(i);
+        std::string result_id = result["id"];
+        std::string id = ids.at(i);
+        ASSERT_STREQ(id.c_str(), result_id.c_str());
+    }
+
+    // multiple search values (works like SQL's IN operator) against a single float field
+    results = coll_array_fields->search("Jeremy", query_fields, "rating:[1.09, 7.812]", facets, sort_fields_desc, 0, 10, 1, FREQUENCY, false).get();
+    ASSERT_EQ(2, results["hits"].size());
+
+    ids = {"2", "0"};
+    for(size_t i = 0; i < results["hits"].size(); i++) {
+        nlohmann::json result = results["hits"].at(i);
+        std::string result_id = result["id"];
+        std::string id = ids.at(i);
+        ASSERT_STREQ(id.c_str(), result_id.c_str());
+    }
+
+    // multiple search values against a float array field - also use extra padding between symbols
+    results = coll_array_fields->search("Jeremy", query_fields, "top_3 : [ 5.431, 0.001 , 7.812, 11.992]", facets, sort_fields_desc, 0, 10, 1, FREQUENCY, false).get();
+    ASSERT_EQ(3, results["hits"].size());
+
+    ids = {"2", "4", "0"};
+    for(size_t i = 0; i < results["hits"].size(); i++) {
+        nlohmann::json result = results["hits"].at(i);
+        std::string result_id = result["id"];
+        std::string id = ids.at(i);
+        ASSERT_STREQ(id.c_str(), result_id.c_str());
+    }
+
+    // when filters don't match any record, no results should be returned
+    Option<nlohmann::json> results_op = coll_array_fields->search("Jeremy", query_fields, "rating:<-2.78", facets, sort_fields_desc, 0, 10, 1, FREQUENCY, false).get();
+    ASSERT_TRUE(results_op.ok());
+    results = results_op.get();
+    ASSERT_EQ(0, results["hits"].size());
+
+    // rank tokens by token ranking field
+    results_op = coll_array_fields->search("j", query_fields, "", facets, sort_fields_desc, 0, 10, 1, MAX_SCORE, true).get();
+    ASSERT_TRUE(results_op.ok());
+    results = results_op.get();
+    ASSERT_EQ(5, results["hits"].size());
+
+    ids = {"1", "2", "4", "0", "3"};
+
+    for(size_t i = 0; i < results["hits"].size(); i++) {
+        nlohmann::json result = results["hits"].at(i);
+        std::string result_id = result["id"];
+        std::string id = ids.at(i);
+        ASSERT_STREQ(id.c_str(), result_id.c_str());
+    }
+
+    collectionManager.drop_collection("coll_array_fields");
+}
+
 TEST_F(CollectionTest, FilterOnTextFields) {
    Collection *coll_array_fields;

--- a/test/numeric_array_documents.jsonl
+++ b/test/numeric_array_documents.jsonl
@ -1,5 +1,5 @@
-{"name": "Jeremy Howard", "age": 24, "years": [2014, 2015, 2016], "timestamps": [1390354022, 1421890022, 1453426022], "tags": ["gold", "silver"]}
-{"name": "Jeremy Howard", "age": 44, "years": [2015, 2016], "timestamps": [1421890022, 1453426022], "tags": ["gold"]}
-{"name": "Jeremy Howard", "age": 21, "years": [2016], "timestamps": [1453426022], "tags": ["bronze", "gold"]}
-{"name": "Jeremy Howard", "age": 63, "years": [1981, 1985], "timestamps": [348974822, 475205222], "tags": ["silver"]}
-{"name": "Jeremy Howard", "age": 32, "years": [1999, 2000, 2001, 2002], "timestamps": [916968422, 948504422, 980126822, 1011662822], "tags": ["silver", "gold", "bronze"]}
+{"name": "Jeremy Howard", "top_3": [1.09, 1.88, 0.001], "rating": 1.09, "age": 24, "years": [2014, 2015, 2016], "timestamps": [1390354022, 1421890022, 1453426022], "tags": ["gold", "silver"]}
+{"name": "Jeremy Howard", "top_3": [9.999, 8.89, 7.713], "rating": 9.999, "age": 44, "years": [2015, 2016], "timestamps": [1421890022, 1453426022], "tags": ["gold"]}
+{"name": "Jeremy Howard", "top_3": [7.812, 7.770, 6.66], "rating": 7.812, "age": 21, "years": [2016], "timestamps": [1453426022], "tags": ["bronze", "gold"]}
+{"name": "Jeremy Howard", "top_3": [0.0, 0.0, 0.0], "rating": 0.0, "age": 63, "years": [1981, 1985], "timestamps": [348974822, 475205222], "tags": ["silver"]}
+{"name": "Jeremy Howard", "top_3": [5.5, 5.431, 1.001], "rating": 5.5, "age": 32, "years": [1999, 2000, 2001, 2002], "timestamps": [916968422, 948504422, 980126822, 1011662822], "tags": ["silver", "gold", "bronze"]}