diff --git a/TODO.md b/TODO.md index d053db38..2143defa 100644 --- a/TODO.md +++ b/TODO.md @@ -48,18 +48,26 @@ - ~~Fetch an individual document~~ - ~~ID field should be a string: must validate~~ - ~~Number of records in collection~~ +- ~~Test for asc/desc upper/lower casing~~ +- ~~Test for search without any sort_by given~~ +- ~~Test for collection creation validation~~ +- ~~Test for delete document~~ +- ~~art float search~~ +- When prefix=true, use token_ranking_field for token ordering only for last word +- only last token should be prefix searched +- test for token ranking on float field +- test for float int field deletion during doc deletion +- Prefix-search strings should not be null terminated +- > INT32_MAX validation for float field +- art bool support +- Proper logging - Add docs/explanation around ranking calc - Use rocksdb batch put for atomic insertion -- When prefix=true, use token_ranking_field for token ordering only for last word - Query token ids should match query token ordering - ID should not have "/" - Group results by field - Handle store-get() not finding a key - Delete using range: https://github.com/facebook/rocksdb/wiki/Delete-A-Range-Of-Keys -- ~~Test for asc/desc upper/lower casing~~ -- ~~Test for search without any sort_by given~~ -- ~~Test for collection creation validation~~ -- ~~Test for delete document~~ - Test for sorted_array::indexOf when length is 0 - Test for snippets - Test for pagination @@ -70,18 +78,12 @@ - UTF-8 support for fuzzy search - Handle searching for non-existing fields gracefully - test for same match score but different primary, secondary attr -- only last token should be prefix searched -- Intersection without unpacking - Support nested fields via "." - Support search operators like +, - etc. -- Prefix-search strings should not be null terminated - string_utils::tokenize should not have max length -- art float search -- Benchmark with -ffast-math - Space sensitivity -- Use bitmap index instead of compressed array for doc list -- Primary_rank_scores and secondary_rank_scores hashmaps should be combined -- Proper logging +- Use bitmap index instead of compressed array for doc list? +- Primary_rank_scores and secondary_rank_scores hashmaps should be combined? - d-ary heap? **API** @@ -105,4 +107,5 @@ **Tech debt** -- ~~Use GLOB file pattern for CMake (better IDE refactoring support)~~ \ No newline at end of file +- ~~Use GLOB file pattern for CMake (better IDE refactoring support)~~ +- DRY index_int64_field* methods \ No newline at end of file diff --git a/include/collection.h b/include/collection.h index 8a9a24e3..488013cf 100644 --- a/include/collection.h +++ b/include/collection.h @@ -103,14 +103,18 @@ private: void index_string_array_field(const std::vector & strings, const uint32_t score, art_tree *t, uint32_t seq_id, const bool verbatim) const; - void index_int32_field(const int32_t value, uint32_t score, art_tree *t, uint32_t seq_id) const; + void index_int32_field(const int32_t value, const uint32_t score, art_tree *t, uint32_t seq_id) const; - void index_int64_field(const int64_t value, uint32_t score, art_tree *t, uint32_t seq_id) const; + void index_int64_field(const int64_t value, const uint32_t score, art_tree *t, uint32_t seq_id) const; + + void index_float_field(const float value, const uint32_t score, art_tree *t, uint32_t seq_id) const; void index_int32_array_field(const std::vector & values, const uint32_t score, art_tree *t, uint32_t seq_id) const; void index_int64_array_field(const std::vector & values, const uint32_t score, art_tree *t, uint32_t seq_id) const; + void index_float_array_field(const std::vector & values, const uint32_t score, art_tree *t, uint32_t seq_id) const; + void remove_and_shift_offset_index(sorted_array &offset_index, const uint32_t *indices_sorted, const uint32_t indices_length); diff --git a/include/field.h b/include/field.h index 07193a33..0b0e4bd3 100644 --- a/include/field.h +++ b/include/field.h @@ -9,6 +9,8 @@ namespace field_types { static const std::string STRING = "STRING"; static const std::string INT32 = "INT32"; static const std::string INT64 = "INT64"; + static const std::string FLOAT = "FLOAT"; + static const std::string FLOAT_ARRAY = "FLOAT_ARRAY"; static const std::string STRING_ARRAY = "STRING_ARRAY"; static const std::string INT32_ARRAY = "INT32_ARRAY"; static const std::string INT64_ARRAY = "INT64_ARRAY"; @@ -27,9 +29,17 @@ struct field { } - bool integer() { - return type == field_types::INT32 || type == field_types::INT32_ARRAY || - type == field_types::INT64 || type == field_types::INT64_ARRAY; + bool is_integer() { + return (type == field_types::INT32 || type == field_types::INT32_ARRAY || + type == field_types::INT64 || type == field_types::INT64_ARRAY); + } + + bool is_float() { + return (type == field_types::FLOAT || type == field_types::FLOAT_ARRAY); + } + + bool is_string() { + return (type == field_types::STRING || type == field_types::STRING_ARRAY); } }; diff --git a/include/string_utils.h b/include/string_utils.h index 88998d66..ffd9f236 100644 --- a/include/string_utils.h +++ b/include/string_utils.h @@ -72,6 +72,28 @@ struct StringUtils { return escaped.str(); } + // See: https://stackoverflow.com/a/19751887/131050 + static bool is_float(const std::string &s) { + std::string::const_iterator it = s.begin(); + bool decimalPoint = false; + int minSize = 0; + if(s.size() > 0 && (s[0] == '-' || s[0] == '+')) { + it++; + minSize++; + } + + while(it != s.end()){ + if(*it == '.') { + if(!decimalPoint) decimalPoint = true; + else break; + } else if(!std::isdigit(*it) && ((*it!='f') || it+1 != s.end() || !decimalPoint)) { + break; + } + ++it; + } + return s.size() > minSize && it == s.end(); + } + // Adapted from: http://stackoverflow.com/a/2845275/131050 static bool is_integer(const std::string &s) { if(s.empty() || ((!isdigit(s[0])) && (s[0] != '-') && (s[0] != '+'))) { diff --git a/src/collection.cpp b/src/collection.cpp index 585e734d..a92320a4 100644 --- a/src/collection.cpp +++ b/src/collection.cpp @@ -85,7 +85,7 @@ Option Collection::index_in_memory(const nlohmann::json &document, uin } if(!token_ranking_field.empty() && !document[token_ranking_field].is_number()) { - return Option<>(400, "Token ranking field `" + token_ranking_field + "` must be an INT32."); + return Option<>(400, "Token ranking field `" + token_ranking_field + "` must be a number."); } if(!token_ranking_field.empty() && document[token_ranking_field].get() > INT32_MAX) { @@ -114,7 +114,7 @@ Option Collection::index_in_memory(const nlohmann::json &document, uin const std::string & text = document[field_name]; index_string_field(text, points, t, seq_id, false); } else if(field_pair.second.type == field_types::INT32) { - if(!document[field_name].is_number()) { + if(!document[field_name].is_number_integer()) { return Option<>(400, "Search field `" + field_name + "` must be an INT32."); } @@ -125,12 +125,19 @@ Option Collection::index_in_memory(const nlohmann::json &document, uin uint32_t value = document[field_name]; index_int32_field(value, points, t, seq_id); } else if(field_pair.second.type == field_types::INT64) { - if(!document[field_name].is_number()) { + if(!document[field_name].is_number_integer()) { return Option<>(400, "Search field `" + field_name + "` must be an INT64."); } uint64_t value = document[field_name]; index_int64_field(value, points, t, seq_id); + } else if(field_pair.second.type == field_types::FLOAT) { + if(!document[field_name].is_number_float()) { + return Option<>(400, "Search field `" + field_name + "` must be a FLOAT."); + } + + float value = document[field_name]; + index_float_field(value, points, t, seq_id); } else if(field_pair.second.type == field_types::STRING_ARRAY) { if(!document[field_name].is_array()) { return Option<>(400, "Search field `" + field_name + "` must be a STRING_ARRAY."); @@ -147,7 +154,7 @@ Option Collection::index_in_memory(const nlohmann::json &document, uin return Option<>(400, "Search field `" + field_name + "` must be an INT32_ARRAY."); } - if(document[field_name].size() > 0 && !document[field_name][0].is_number()) { + if(document[field_name].size() > 0 && !document[field_name][0].is_number_integer()) { return Option<>(400, "Search field `" + field_name + "` must be an INT32_ARRAY."); } @@ -158,12 +165,23 @@ Option Collection::index_in_memory(const nlohmann::json &document, uin return Option<>(400, "Search field `" + field_name + "` must be an INT64_ARRAY."); } - if(document[field_name].size() > 0 && !document[field_name][0].is_number()) { + if(document[field_name].size() > 0 && !document[field_name][0].is_number_integer()) { return Option<>(400, "Search field `" + field_name + "` must be an INT64_ARRAY."); } std::vector values = document[field_name]; index_int64_array_field(values, points, t, seq_id); + } else if(field_pair.second.type == field_types::FLOAT_ARRAY) { + if(!document[field_name].is_array()) { + return Option<>(400, "Search field `" + field_name + "` must be an FLOAT_ARRAY."); + } + + if(document[field_name].size() > 0 && !document[field_name][0].is_number_float()) { + return Option<>(400, "Search field `" + field_name + "` must be an FLOAT_ARRAY."); + } + + std::vector values = document[field_name]; + index_float_array_field(values, points, t, seq_id); } } @@ -260,6 +278,30 @@ void Collection::index_int64_field(const int64_t value, uint32_t score, art_tree art_insert(t, key, KEY_LEN, &art_doc, num_hits); } +void Collection::index_float_field(const float value, uint32_t score, art_tree *t, uint32_t seq_id) const { + const int KEY_LEN = 8; + unsigned char key[KEY_LEN]; + + encode_float(value, key); + + uint32_t num_hits = 0; + art_leaf* leaf = (art_leaf *) art_search(t, key, KEY_LEN); + if(leaf != NULL) { + num_hits = leaf->values->ids.getLength(); + } + + num_hits += 1; + + art_document art_doc; + art_doc.id = seq_id; + art_doc.score = score; + art_doc.offsets_len = 0; + art_doc.offsets = nullptr; + + art_insert(t, key, KEY_LEN, &art_doc, num_hits); +} + + void Collection::index_string_field(const std::string & text, const uint32_t score, art_tree *t, uint32_t seq_id, const bool verbatim) const { std::vector tokens; @@ -327,6 +369,13 @@ void Collection::index_int64_array_field(const std::vector & values, co } } +void Collection::index_float_array_field(const std::vector & values, const float score, art_tree *t, + uint32_t seq_id) const { + for(const float value: values) { + index_float_field(value, score, t, seq_id); + } +} + void Collection::do_facets(std::vector & facets, uint32_t* result_ids, size_t results_size) { for(auto & a_facet: facets) { // assumed that facet fields have already been validated upstream @@ -466,16 +515,20 @@ Option Collection::do_filtering(uint32_t** filter_ids_out, const std:: const std::string & raw_value = expression_parts[1]; filter f; - if(_field.integer()) { + if(_field.is_integer() || _field.is_float()) { // could be a single value or a list if(raw_value[0] == '[' && raw_value[raw_value.size() - 1] == ']') { std::vector filter_values; StringUtils::split(raw_value.substr(1, raw_value.size() - 2), filter_values, ","); for(const std::string & filter_value: filter_values) { - if(!StringUtils::is_integer(filter_value)) { + if(_field.is_integer() && !StringUtils::is_integer(filter_value)) { return Option<>(400, "Error with field `" + _field.name + "`: Not an integer."); } + + if(_field.is_float() && !StringUtils::is_float(filter_value)) { + return Option<>(400, "Error with field `" + _field.name + "`: Not a float."); + } } f = {field_name, filter_values, EQUALS}; @@ -498,13 +551,17 @@ Option Collection::do_filtering(uint32_t** filter_ids_out, const std:: filter_value = StringUtils::trim(filter_value); - if(!StringUtils::is_integer(filter_value)) { + if(_field.is_integer() && !StringUtils::is_integer(filter_value)) { return Option<>(400, "Error with field `" + _field.name + "`: Not an integer."); } + if(_field.is_float() && !StringUtils::is_float(filter_value)) { + return Option<>(400, "Error with field `" + _field.name + "`: Not a float."); + } + f = {field_name, {filter_value}, op_comparator.get()}; } - } else { + } else if(_field.is_string()) { if(raw_value[0] == '[' && raw_value[raw_value.size() - 1] == ']') { std::vector filter_values; StringUtils::split(raw_value.substr(1, raw_value.size() - 2), filter_values, ","); @@ -512,6 +569,8 @@ Option Collection::do_filtering(uint32_t** filter_ids_out, const std:: } else { f = {field_name, {raw_value}, EQUALS}; } + } else { + return Option<>(400, "Error with field `" + _field.name + "`: Unidentified field type."); } filters.push_back(f); @@ -527,7 +586,7 @@ Option Collection::do_filtering(uint32_t** filter_ids_out, const std:: field f = search_schema.at(a_filter.field_name); std::vector leaves; - if(f.integer()) { + if(f.is_integer()) { for(const std::string & filter_value: a_filter.values) { if(f.type == field_types::INT32 || f.type == field_types::INT32_ARRAY) { int32_t value = (int32_t) std::stoi(filter_value); @@ -537,7 +596,12 @@ Option Collection::do_filtering(uint32_t** filter_ids_out, const std:: art_int64_search(t, value, a_filter.compare_operator, leaves); } } - } else if(f.type == field_types::STRING || f.type == field_types::STRING_ARRAY) { + } else if(f.is_float()) { + for(const std::string & filter_value: a_filter.values) { + float value = (float) std::atof(filter_value.c_str()); + art_float_search(t, value, a_filter.compare_operator, leaves); + } + } else if(f.is_string()) { for(const std::string & filter_value: a_filter.values) { art_leaf* leaf = (art_leaf *) art_search(t, (const unsigned char*) filter_value.c_str(), filter_value.length()+1); if(leaf != nullptr) { @@ -1153,6 +1217,7 @@ Option Collection::remove(const std::string & id) { nlohmann::json document = nlohmann::json::parse(parsed_document); for(auto & name_field: search_schema) { + // Go through all the field names and find the keys+values so that they can be removed from in-memory index std::vector tokens; if(name_field.second.type == field_types::STRING) { StringUtils::split(document[name_field.first], tokens, " "); @@ -1186,6 +1251,20 @@ Option Collection::remove(const std::string & id) { encode_int64(value, key); tokens.push_back(std::string((char*)key, KEY_LEN)); } + } else if(name_field.second.type == field_types::FLOAT) { + const int KEY_LEN = 8; + unsigned char key[KEY_LEN]; + int64_t value = document[name_field.first].get(); + encode_float(value, key); + tokens.push_back(std::string((char*)key, KEY_LEN)); + } else if(name_field.second.type == field_types::FLOAT_ARRAY) { + std::vector values = document[name_field.first].get>(); + for(const float value: values) { + const int KEY_LEN = 8; + unsigned char key[KEY_LEN]; + encode_float(value, key); + tokens.push_back(std::string((char*)key, KEY_LEN)); + } } for(auto & token: tokens) { diff --git a/src/main/benchmark.cpp b/src/main/benchmark.cpp index 4076d68e..0be93f3d 100644 --- a/src/main/benchmark.cpp +++ b/src/main/benchmark.cpp @@ -22,14 +22,15 @@ int main(int argc, char* argv[]) { CollectionManager & collectionManager = CollectionManager::get_instance(); collectionManager.init(store, "abcd"); - Collection *collection = collectionManager.get_collection("collection"); + Collection *collection = collectionManager.get_collection("hnstories_direct"); if(collection == nullptr) { - collection = collectionManager.create_collection("collection", fields_to_index, {}, sort_fields); + collection = collectionManager.create_collection("hnstories_direct", fields_to_index, {}, sort_fields); } - std::ifstream infile("/Users/kishore/Downloads/hnstories_small.jsonl"); + std::ifstream infile("/Users/kishore/Downloads/hnstories.jsonl"); std::string json_line; + auto begin = std::chrono::high_resolution_clock::now(); while (std::getline(infile, json_line)) { collection->add(json_line); @@ -38,7 +39,7 @@ int main(int argc, char* argv[]) { infile.close(); cout << "FINISHED INDEXING!" << endl << flush; - std::vector search_fields = {"title"}; + /*std::vector search_fields = {"title"}; std::vector queries = {"the", "and", "to", "of", "in"}; auto counter = 0; @@ -51,10 +52,10 @@ int main(int argc, char* argv[]) { auto results = collection->search(queries[i], search_fields, "", { }, {sort_field("points", "DESC")}, 1, 10, 1, MAX_SCORE, 0).get(); results_total += results.size(); counter++; - } + }*/ long long int timeMillis = std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - begin).count(); cout << "Time taken: " << timeMillis << "ms" << endl; - cout << "Total: " << results_total << endl; + //cout << "Total: " << results_total << endl; return 0; } \ No newline at end of file diff --git a/test/art_test.cpp b/test/art_test.cpp index 0932f37a..b62b065c 100644 --- a/test/art_test.cpp +++ b/test/art_test.cpp @@ -1138,6 +1138,11 @@ TEST(ArtTest, test_encode_float_positive) { ASSERT_EQ(1, results.size()); results.clear(); + res = art_float_search(&t, 0.0, GREATER_THAN, results); + ASSERT_TRUE(res == 0); + ASSERT_EQ(5, results.size()); + results.clear(); + res = art_float_search(&t, 10.5678, LESS_THAN, results); ASSERT_TRUE(res == 0); ASSERT_EQ(4, results.size()); @@ -1153,10 +1158,20 @@ TEST(ArtTest, test_encode_float_positive) { ASSERT_EQ(1, results.size()); results.clear(); + res = art_float_search(&t, 10.4, GREATER_THAN, results); + ASSERT_TRUE(res == 0); + ASSERT_EQ(2, results.size()); + results.clear(); + res = art_float_search(&t, 10.5678, GREATER_THAN_EQUALS, results); ASSERT_TRUE(res == 0); ASSERT_EQ(2, results.size()); results.clear(); + + res = art_float_search(&t, 10, GREATER_THAN_EQUALS, results); + ASSERT_TRUE(res == 0); + ASSERT_EQ(2, results.size()); + results.clear(); } TEST(ArtTest, test_encode_float_positive_negative) { @@ -1204,4 +1219,9 @@ TEST(ArtTest, test_encode_float_positive_negative) { ASSERT_TRUE(res == 0); ASSERT_EQ(6, results.size()); results.clear(); + + res = art_float_search(&t, -24, GREATER_THAN_EQUALS, results); + ASSERT_TRUE(res == 0); + ASSERT_EQ(5, results.size()); + results.clear(); } \ No newline at end of file diff --git a/test/collection_test.cpp b/test/collection_test.cpp index 14159e97..023ea17d 100644 --- a/test/collection_test.cpp +++ b/test/collection_test.cpp @@ -597,6 +597,143 @@ TEST_F(CollectionTest, FilterOnNumericFields) { collectionManager.drop_collection("coll_array_fields"); } +TEST_F(CollectionTest, FilterOnFloatFields) { + Collection *coll_array_fields; + + std::ifstream infile(std::string(ROOT_DIR)+"test/numeric_array_documents.jsonl"); + std::vector fields = {field("name", field_types::STRING), field("age", field_types::INT32), + field("top_3", field_types::FLOAT_ARRAY), + field("rating", field_types::FLOAT)}; + std::vector sort_fields_index = { field("rating", "FLOAT") }; + std::vector sort_fields_desc = { sort_field("rating", "DESC") }; + std::vector sort_fields_asc = { sort_field("rating", "ASC") }; + + coll_array_fields = collectionManager.get_collection("coll_array_fields"); + if(coll_array_fields == nullptr) { + coll_array_fields = collectionManager.create_collection("coll_array_fields", fields, facet_fields, sort_fields_index); + } + + std::string json_line; + + while (std::getline(infile, json_line)) { + coll_array_fields->add(json_line); + } + + infile.close(); + + // Plain search with no filters - results should be sorted by rating field DESC + query_fields = {"name"}; + std::vector facets; + nlohmann::json results = coll_array_fields->search("Jeremy", query_fields, "", facets, sort_fields_desc, 0, 10, 1, FREQUENCY, false).get(); + ASSERT_EQ(5, results["hits"].size()); + + std::vector ids = {"1", "2", "4", "0", "3"}; + + for(size_t i = 0; i < results["hits"].size(); i++) { + nlohmann::json result = results["hits"].at(i); + std::string result_id = result["id"]; + std::string id = ids.at(i); + ASSERT_STREQ(id.c_str(), result_id.c_str()); + } + + // Plain search with no filters - results should be sorted by rating field ASC + results = coll_array_fields->search("Jeremy", query_fields, "", facets, sort_fields_asc, 0, 10, 1, FREQUENCY, false).get(); + ASSERT_EQ(5, results["hits"].size()); + + ids = {"3", "0", "4", "2", "1"}; + + for(size_t i = 0; i < results["hits"].size(); i++) { + nlohmann::json result = results["hits"].at(i); + std::string result_id = result["id"]; + std::string id = ids.at(i); + ASSERT_STREQ(id.c_str(), result_id.c_str()); + } + + // Searching on a float field, sorted desc by rating + results = coll_array_fields->search("Jeremy", query_fields, "rating:>0.0", facets, sort_fields_desc, 0, 10, 1, FREQUENCY, false).get(); + ASSERT_EQ(4, results["hits"].size()); + + ids = {"1", "2", "4", "0"}; + + for(size_t i = 0; i < results["hits"].size(); i++) { + nlohmann::json result = results["hits"].at(i); + std::string result_id = result["id"]; + std::string id = ids.at(i); + ASSERT_STREQ(id.c_str(), result_id.c_str()); + } + + // Searching a float against an float array field + results = coll_array_fields->search("Jeremy", query_fields, "top_3:>7.8", facets, sort_fields_desc, 0, 10, 1, FREQUENCY, false).get(); + ASSERT_EQ(2, results["hits"].size()); + + ids = {"1", "2"}; + for(size_t i = 0; i < results["hits"].size(); i++) { + nlohmann::json result = results["hits"].at(i); + std::string result_id = result["id"]; + std::string id = ids.at(i); + ASSERT_STREQ(id.c_str(), result_id.c_str()); + } + + // multiple filters + results = coll_array_fields->search("Jeremy", query_fields, "top_3:>7.8 && rating:>7.9", facets, sort_fields_desc, 0, 10, 1, FREQUENCY, false).get(); + ASSERT_EQ(1, results["hits"].size()); + + ids = {"1"}; + for(size_t i = 0; i < results["hits"].size(); i++) { + nlohmann::json result = results["hits"].at(i); + std::string result_id = result["id"]; + std::string id = ids.at(i); + ASSERT_STREQ(id.c_str(), result_id.c_str()); + } + + // multiple search values (works like SQL's IN operator) against a single float field + results = coll_array_fields->search("Jeremy", query_fields, "rating:[1.09, 7.812]", facets, sort_fields_desc, 0, 10, 1, FREQUENCY, false).get(); + ASSERT_EQ(2, results["hits"].size()); + + ids = {"2", "0"}; + for(size_t i = 0; i < results["hits"].size(); i++) { + nlohmann::json result = results["hits"].at(i); + std::string result_id = result["id"]; + std::string id = ids.at(i); + ASSERT_STREQ(id.c_str(), result_id.c_str()); + } + + // multiple search values against a float array field - also use extra padding between symbols + results = coll_array_fields->search("Jeremy", query_fields, "top_3 : [ 5.431, 0.001 , 7.812, 11.992]", facets, sort_fields_desc, 0, 10, 1, FREQUENCY, false).get(); + ASSERT_EQ(3, results["hits"].size()); + + ids = {"2", "4", "0"}; + for(size_t i = 0; i < results["hits"].size(); i++) { + nlohmann::json result = results["hits"].at(i); + std::string result_id = result["id"]; + std::string id = ids.at(i); + ASSERT_STREQ(id.c_str(), result_id.c_str()); + } + + // when filters don't match any record, no results should be returned + Option results_op = coll_array_fields->search("Jeremy", query_fields, "rating:<-2.78", facets, sort_fields_desc, 0, 10, 1, FREQUENCY, false).get(); + ASSERT_TRUE(results_op.ok()); + results = results_op.get(); + ASSERT_EQ(0, results["hits"].size()); + + // rank tokens by token ranking field + results_op = coll_array_fields->search("j", query_fields, "", facets, sort_fields_desc, 0, 10, 1, MAX_SCORE, true).get(); + ASSERT_TRUE(results_op.ok()); + results = results_op.get(); + ASSERT_EQ(5, results["hits"].size()); + + ids = {"1", "2", "4", "0", "3"}; + + for(size_t i = 0; i < results["hits"].size(); i++) { + nlohmann::json result = results["hits"].at(i); + std::string result_id = result["id"]; + std::string id = ids.at(i); + ASSERT_STREQ(id.c_str(), result_id.c_str()); + } + + collectionManager.drop_collection("coll_array_fields"); +} + TEST_F(CollectionTest, FilterOnTextFields) { Collection *coll_array_fields; diff --git a/test/numeric_array_documents.jsonl b/test/numeric_array_documents.jsonl index 75c02c8e..6c62ef00 100644 --- a/test/numeric_array_documents.jsonl +++ b/test/numeric_array_documents.jsonl @@ -1,5 +1,5 @@ -{"name": "Jeremy Howard", "age": 24, "years": [2014, 2015, 2016], "timestamps": [1390354022, 1421890022, 1453426022], "tags": ["gold", "silver"]} -{"name": "Jeremy Howard", "age": 44, "years": [2015, 2016], "timestamps": [1421890022, 1453426022], "tags": ["gold"]} -{"name": "Jeremy Howard", "age": 21, "years": [2016], "timestamps": [1453426022], "tags": ["bronze", "gold"]} -{"name": "Jeremy Howard", "age": 63, "years": [1981, 1985], "timestamps": [348974822, 475205222], "tags": ["silver"]} -{"name": "Jeremy Howard", "age": 32, "years": [1999, 2000, 2001, 2002], "timestamps": [916968422, 948504422, 980126822, 1011662822], "tags": ["silver", "gold", "bronze"]} \ No newline at end of file +{"name": "Jeremy Howard", "top_3": [1.09, 1.88, 0.001], "rating": 1.09, "age": 24, "years": [2014, 2015, 2016], "timestamps": [1390354022, 1421890022, 1453426022], "tags": ["gold", "silver"]} +{"name": "Jeremy Howard", "top_3": [9.999, 8.89, 7.713], "rating": 9.999, "age": 44, "years": [2015, 2016], "timestamps": [1421890022, 1453426022], "tags": ["gold"]} +{"name": "Jeremy Howard", "top_3": [7.812, 7.770, 6.66], "rating": 7.812, "age": 21, "years": [2016], "timestamps": [1453426022], "tags": ["bronze", "gold"]} +{"name": "Jeremy Howard", "top_3": [0.0, 0.0, 0.0], "rating": 0.0, "age": 63, "years": [1981, 1985], "timestamps": [348974822, 475205222], "tags": ["silver"]} +{"name": "Jeremy Howard", "top_3": [5.5, 5.431, 1.001], "rating": 5.5, "age": 32, "years": [1999, 2000, 2001, 2002], "timestamps": [916968422, 948504422, 980126822, 1011662822], "tags": ["silver", "gold", "bronze"]} \ No newline at end of file