From 876f0f64dc04cc47dfefaf5498e2348d9ca88ef8 Mon Sep 17 00:00:00 2001 From: Kishore Nallan Date: Fri, 18 Jun 2021 20:14:36 +0530 Subject: [PATCH] Support not equals on boolean filds. --- src/collection.cpp | 26 +++++- src/index.cpp | 28 +++++- test/collection_filtering_test.cpp | 142 ++++++++++++++++++++++++++++- test/collection_test.cpp | 109 ---------------------- 4 files changed, 189 insertions(+), 116 deletions(-) diff --git a/src/collection.cpp b/src/collection.cpp index cb2ee20a..23b4bcb3 100644 --- a/src/collection.cpp +++ b/src/collection.cpp @@ -2156,6 +2156,27 @@ Option Collection::parse_filter_query(const std::string& simple_filter_que } } } else if(_field.is_bool()) { + NUM_COMPARATOR bool_comparator = EQUALS; + size_t filter_value_index = 0; + + if(raw_value[0] == '=') { + bool_comparator = EQUALS; + while(++filter_value_index < raw_value.size() && raw_value[filter_value_index] == ' '); + } else if(raw_value.size() >= 2 && raw_value[0] == '!' && raw_value[1] == '=') { + bool_comparator = NOT_EQUALS; + filter_value_index++; + while(++filter_value_index < raw_value.size() && raw_value[filter_value_index] == ' '); + } + + if(filter_value_index != 0) { + raw_value = raw_value.substr(filter_value_index); + } + + if(filter_value_index == raw_value.size()) { + return Option(400, "Error with filter field `" + _field.name + + "`: Filter value cannot be empty."); + } + if(raw_value[0] == '[' && raw_value[raw_value.size() - 1] == ']') { std::vector filter_values; StringUtils::split(raw_value.substr(1, raw_value.size() - 2), filter_values, ","); @@ -2169,14 +2190,15 @@ Option Collection::parse_filter_query(const std::string& simple_filter_que filter_value = (filter_value == "true") ? "1" : "0"; f.values.push_back(filter_value); - f.comparators.push_back(EQUALS); + f.comparators.push_back(bool_comparator); } } else { if(raw_value != "true" && raw_value != "false") { return Option(400, "Value of filter field `" + _field.name + "` must be `true` or `false`."); } + std::string bool_value = (raw_value == "true") ? "1" : "0"; - f = {field_name, {bool_value}, {EQUALS}}; + f = {field_name, {bool_value}, {bool_comparator}}; } } else if(_field.is_geopoint()) { diff --git a/src/index.cpp b/src/index.cpp index bdf4c875..8f5e2bf6 100644 --- a/src/index.cpp +++ b/src/index.cpp @@ -1088,7 +1088,33 @@ uint32_t Index::do_filtering(uint32_t** filter_ids_out, const std::vectorsearch(a_filter.comparators[value_index], bool_int64, &result_ids, result_ids_len); + if(a_filter.comparators[value_index] == NOT_EQUALS) { + uint32_t* to_exclude_ids = nullptr; + size_t to_exclude_ids_len = 0; + num_tree->search(EQUALS, bool_int64, &to_exclude_ids, to_exclude_ids_len); + + auto all_ids = seq_ids.uncompress(); + auto all_ids_size = seq_ids.getLength(); + + uint32_t* excluded_ids = nullptr; + size_t excluded_ids_len = 0; + + excluded_ids_len = ArrayUtils::exclude_scalar(all_ids, all_ids_size, to_exclude_ids, + to_exclude_ids_len, &excluded_ids); + + delete [] all_ids; + delete [] to_exclude_ids; + + uint32_t *out = nullptr; + result_ids_len = ArrayUtils::or_scalar(result_ids, result_ids_len, + excluded_ids, excluded_ids_len, &out); + delete [] result_ids; + result_ids = out; + delete [] excluded_ids; + } else { + num_tree->search(a_filter.comparators[value_index], bool_int64, &result_ids, result_ids_len); + } + value_index++; } diff --git a/test/collection_filtering_test.cpp b/test/collection_filtering_test.cpp index d81ae59a..5ad85c4a 100644 --- a/test/collection_filtering_test.cpp +++ b/test/collection_filtering_test.cpp @@ -390,10 +390,10 @@ TEST_F(CollectionFilteringTest, FilterAndQueryFieldRestrictions) { std::ifstream infile(std::string(ROOT_DIR)+"test/multi_field_documents.jsonl"); std::vector fields = { - field("title", field_types::STRING, false), - field("starring", field_types::STRING, false), - field("cast", field_types::STRING_ARRAY, true), - field("points", field_types::INT32, false) + field("title", field_types::STRING, false), + field("starring", field_types::STRING, false), + field("cast", field_types::STRING_ARRAY, true), + field("points", field_types::INT32, false) }; coll_mul_fields = collectionManager.get_collection("coll_mul_fields").get(); @@ -1464,3 +1464,137 @@ TEST_F(CollectionFilteringTest, NumericalRangeFilter) { collectionManager.drop_collection("coll1"); } + +TEST_F(CollectionFilteringTest, QueryBoolFields) { + Collection *coll_bool; + + std::ifstream infile(std::string(ROOT_DIR)+"test/bool_documents.jsonl"); + std::vector fields = { + field("popular", field_types::BOOL, false), + field("title", field_types::STRING, false), + field("rating", field_types::FLOAT, false), + field("bool_array", field_types::BOOL_ARRAY, false), + }; + + std::vector sort_fields = { sort_by("popular", "DESC"), sort_by("rating", "DESC") }; + + coll_bool = collectionManager.get_collection("coll_bool").get(); + if(coll_bool == nullptr) { + coll_bool = collectionManager.create_collection("coll_bool", 1, fields, "rating").get(); + } + + std::string json_line; + + while (std::getline(infile, json_line)) { + coll_bool->add(json_line); + } + + infile.close(); + + // Plain search with no filters - results should be sorted correctly + query_fields = {"title"}; + std::vector facets; + nlohmann::json results = coll_bool->search("the", query_fields, "", facets, sort_fields, {0}, 10, 1, FREQUENCY, {false}).get(); + ASSERT_EQ(5, results["hits"].size()); + + std::vector ids = {"1", "3", "4", "9", "2"}; + + for(size_t i = 0; i < results["hits"].size(); i++) { + nlohmann::json result = results["hits"].at(i); + std::string result_id = result["document"]["id"]; + std::string id = ids.at(i); + ASSERT_STREQ(id.c_str(), result_id.c_str()); + } + + // Searching on a bool field + results = coll_bool->search("the", query_fields, "popular:true", facets, sort_fields, {0}, 10, 1, FREQUENCY, {false}).get(); + ASSERT_EQ(3, results["hits"].size()); + + ids = {"1", "3", "4"}; + + for(size_t i = 0; i < results["hits"].size(); i++) { + nlohmann::json result = results["hits"].at(i); + std::string result_id = result["document"]["id"]; + std::string id = ids.at(i); + ASSERT_STREQ(id.c_str(), result_id.c_str()); + } + + // alternative `:=` syntax + results = coll_bool->search("the", query_fields, "popular:=true", facets, sort_fields, {0}, 10, 1, FREQUENCY, {false}).get(); + ASSERT_EQ(3, results["hits"].size()); + + results = coll_bool->search("the", query_fields, "popular:false", facets, sort_fields, {0}, 10, 1, FREQUENCY, {false}).get(); + ASSERT_EQ(2, results["hits"].size()); + + results = coll_bool->search("the", query_fields, "popular:= false", facets, sort_fields, {0}, 10, 1, FREQUENCY, {false}).get(); + ASSERT_EQ(2, results["hits"].size()); + + ids = {"9", "2"}; + + for(size_t i = 0; i < results["hits"].size(); i++) { + nlohmann::json result = results["hits"].at(i); + std::string result_id = result["document"]["id"]; + std::string id = ids.at(i); + ASSERT_STREQ(id.c_str(), result_id.c_str()); + } + + // searching against a bool array field + + // should be able to filter with an array of boolean values + Option res_op = coll_bool->search("the", query_fields, "bool_array:[true, false]", facets, + sort_fields, {0}, 10, 1, FREQUENCY, {false}); + ASSERT_TRUE(res_op.ok()); + results = res_op.get(); + + ASSERT_EQ(5, results["hits"].size()); + + results = coll_bool->search("the", query_fields, "bool_array: true", facets, sort_fields, {0}, 10, 1, FREQUENCY, {false}).get(); + ASSERT_EQ(4, results["hits"].size()); + ids = {"1", "4", "9", "2"}; + + for(size_t i = 0; i < results["hits"].size(); i++) { + nlohmann::json result = results["hits"].at(i); + std::string result_id = result["document"]["id"]; + std::string id = ids.at(i); + ASSERT_STREQ(id.c_str(), result_id.c_str()); + } + + // should be able to search using array with a single element boolean value + + results = coll_bool->search("the", query_fields, "bool_array:[true]", facets, + sort_fields, {0}, 10, 1, FREQUENCY, {false}).get(); + + ASSERT_EQ(4, results["hits"].size()); + + for(size_t i = 0; i < results["hits"].size(); i++) { + nlohmann::json result = results["hits"].at(i); + std::string result_id = result["document"]["id"]; + std::string id = ids.at(i); + ASSERT_STREQ(id.c_str(), result_id.c_str()); + } + + // not equals on bool field + + results = coll_bool->search("the", query_fields, "popular:!= true", facets, + sort_fields, {0}, 10, 1, FREQUENCY, {false}).get(); + + ASSERT_EQ(2, results["hits"].size()); + ASSERT_EQ("9", results["hits"][0]["document"]["id"].get()); + ASSERT_EQ("2", results["hits"][1]["document"]["id"].get()); + + // not equals on bool array field + results = coll_bool->search("the", query_fields, "bool_array:!= [true]", facets, + sort_fields, {0}, 10, 1, FREQUENCY, {false}).get(); + + ASSERT_EQ(1, results["hits"].size()); + ASSERT_EQ("3", results["hits"][0]["document"]["id"].get()); + + // empty filter value + res_op = coll_bool->search("the", query_fields, "bool_array:=", facets, + sort_fields, {0}, 10, 1, FREQUENCY, {false}); + + ASSERT_FALSE(res_op.ok()); + ASSERT_EQ("Error with filter field `bool_array`: Filter value cannot be empty.", res_op.error()); + + collectionManager.drop_collection("coll_bool"); +} diff --git a/test/collection_test.cpp b/test/collection_test.cpp index fb87efab..ddd9480a 100644 --- a/test/collection_test.cpp +++ b/test/collection_test.cpp @@ -1404,115 +1404,6 @@ TEST_F(CollectionTest, ImportDocuments) { collectionManager.drop_collection("coll_mul_fields"); } -TEST_F(CollectionTest, QueryBoolFields) { - Collection *coll_bool; - - std::ifstream infile(std::string(ROOT_DIR)+"test/bool_documents.jsonl"); - std::vector fields = { - field("popular", field_types::BOOL, false), - field("title", field_types::STRING, false), - field("rating", field_types::FLOAT, false), - field("bool_array", field_types::BOOL_ARRAY, false), - }; - - std::vector sort_fields = { sort_by("popular", "DESC"), sort_by("rating", "DESC") }; - - coll_bool = collectionManager.get_collection("coll_bool").get(); - if(coll_bool == nullptr) { - coll_bool = collectionManager.create_collection("coll_bool", 4, fields, "rating").get(); - } - - std::string json_line; - - while (std::getline(infile, json_line)) { - coll_bool->add(json_line); - } - - infile.close(); - - // Plain search with no filters - results should be sorted correctly - query_fields = {"title"}; - std::vector facets; - nlohmann::json results = coll_bool->search("the", query_fields, "", facets, sort_fields, {0}, 10, 1, FREQUENCY, {false}).get(); - ASSERT_EQ(5, results["hits"].size()); - - std::vector ids = {"1", "3", "4", "9", "2"}; - - for(size_t i = 0; i < results["hits"].size(); i++) { - nlohmann::json result = results["hits"].at(i); - std::string result_id = result["document"]["id"]; - std::string id = ids.at(i); - ASSERT_STREQ(id.c_str(), result_id.c_str()); - } - - // Searching on a bool field - results = coll_bool->search("the", query_fields, "popular:true", facets, sort_fields, {0}, 10, 1, FREQUENCY, {false}).get(); - ASSERT_EQ(3, results["hits"].size()); - - ids = {"1", "3", "4"}; - - for(size_t i = 0; i < results["hits"].size(); i++) { - nlohmann::json result = results["hits"].at(i); - std::string result_id = result["document"]["id"]; - std::string id = ids.at(i); - ASSERT_STREQ(id.c_str(), result_id.c_str()); - } - - // alternative `:=` syntax - results = coll_bool->search("the", query_fields, "popular:=true", facets, sort_fields, {0}, 10, 1, FREQUENCY, {false}).get(); - ASSERT_EQ(3, results["hits"].size()); - - results = coll_bool->search("the", query_fields, "popular:false", facets, sort_fields, {0}, 10, 1, FREQUENCY, {false}).get(); - ASSERT_EQ(2, results["hits"].size()); - - ids = {"9", "2"}; - - for(size_t i = 0; i < results["hits"].size(); i++) { - nlohmann::json result = results["hits"].at(i); - std::string result_id = result["document"]["id"]; - std::string id = ids.at(i); - ASSERT_STREQ(id.c_str(), result_id.c_str()); - } - - // searching against a bool array field - - // should be able to filter with an array of boolean values - Option res_op = coll_bool->search("the", query_fields, "bool_array:[true, false]", facets, - sort_fields, {0}, 10, 1, FREQUENCY, {false}); - ASSERT_TRUE(res_op.ok()); - results = res_op.get(); - - ASSERT_EQ(5, results["hits"].size()); - - results = coll_bool->search("the", query_fields, "bool_array: true", facets, sort_fields, {0}, 10, 1, FREQUENCY, {false}).get(); - ASSERT_EQ(4, results["hits"].size()); - ids = {"1", "4", "9", "2"}; - - for(size_t i = 0; i < results["hits"].size(); i++) { - nlohmann::json result = results["hits"].at(i); - std::string result_id = result["document"]["id"]; - std::string id = ids.at(i); - ASSERT_STREQ(id.c_str(), result_id.c_str()); - } - - // should be able to search using array with a single element boolean value - - auto res = coll_bool->search("the", query_fields, "bool_array:[true]", facets, - sort_fields, {0}, 10, 1, FREQUENCY, {false}).get(); - - results = coll_bool->search("the", query_fields, "bool_array: true", facets, sort_fields, {0}, 10, 1, FREQUENCY, {false}).get(); - ASSERT_EQ(4, results["hits"].size()); - - for(size_t i = 0; i < results["hits"].size(); i++) { - nlohmann::json result = results["hits"].at(i); - std::string result_id = result["document"]["id"]; - std::string id = ids.at(i); - ASSERT_STREQ(id.c_str(), result_id.c_str()); - } - - collectionManager.drop_collection("coll_bool"); -} - TEST_F(CollectionTest, SearchingWithMissingFields) { // return error without crashing when searching for fields that do not conform to the schema Collection *coll_array_fields;