Support not equals on boolean filds.

This commit is contained in:
Kishore Nallan 2021-06-18 20:14:36 +05:30
parent ba9a5e65d1
commit 876f0f64dc
4 changed files with 189 additions and 116 deletions

View File

@ -2156,6 +2156,27 @@ Option<bool> Collection::parse_filter_query(const std::string& simple_filter_que
}
}
} else if(_field.is_bool()) {
NUM_COMPARATOR bool_comparator = EQUALS;
size_t filter_value_index = 0;
if(raw_value[0] == '=') {
bool_comparator = EQUALS;
while(++filter_value_index < raw_value.size() && raw_value[filter_value_index] == ' ');
} else if(raw_value.size() >= 2 && raw_value[0] == '!' && raw_value[1] == '=') {
bool_comparator = NOT_EQUALS;
filter_value_index++;
while(++filter_value_index < raw_value.size() && raw_value[filter_value_index] == ' ');
}
if(filter_value_index != 0) {
raw_value = raw_value.substr(filter_value_index);
}
if(filter_value_index == raw_value.size()) {
return Option<bool>(400, "Error with filter field `" + _field.name +
"`: Filter value cannot be empty.");
}
if(raw_value[0] == '[' && raw_value[raw_value.size() - 1] == ']') {
std::vector<std::string> filter_values;
StringUtils::split(raw_value.substr(1, raw_value.size() - 2), filter_values, ",");
@ -2169,14 +2190,15 @@ Option<bool> Collection::parse_filter_query(const std::string& simple_filter_que
filter_value = (filter_value == "true") ? "1" : "0";
f.values.push_back(filter_value);
f.comparators.push_back(EQUALS);
f.comparators.push_back(bool_comparator);
}
} else {
if(raw_value != "true" && raw_value != "false") {
return Option<bool>(400, "Value of filter field `" + _field.name + "` must be `true` or `false`.");
}
std::string bool_value = (raw_value == "true") ? "1" : "0";
f = {field_name, {bool_value}, {EQUALS}};
f = {field_name, {bool_value}, {bool_comparator}};
}
} else if(_field.is_geopoint()) {

View File

@ -1088,7 +1088,33 @@ uint32_t Index::do_filtering(uint32_t** filter_ids_out, const std::vector<filter
size_t value_index = 0;
for(const std::string & filter_value: a_filter.values) {
int64_t bool_int64 = (filter_value == "1") ? 1 : 0;
num_tree->search(a_filter.comparators[value_index], bool_int64, &result_ids, result_ids_len);
if(a_filter.comparators[value_index] == NOT_EQUALS) {
uint32_t* to_exclude_ids = nullptr;
size_t to_exclude_ids_len = 0;
num_tree->search(EQUALS, bool_int64, &to_exclude_ids, to_exclude_ids_len);
auto all_ids = seq_ids.uncompress();
auto all_ids_size = seq_ids.getLength();
uint32_t* excluded_ids = nullptr;
size_t excluded_ids_len = 0;
excluded_ids_len = ArrayUtils::exclude_scalar(all_ids, all_ids_size, to_exclude_ids,
to_exclude_ids_len, &excluded_ids);
delete [] all_ids;
delete [] to_exclude_ids;
uint32_t *out = nullptr;
result_ids_len = ArrayUtils::or_scalar(result_ids, result_ids_len,
excluded_ids, excluded_ids_len, &out);
delete [] result_ids;
result_ids = out;
delete [] excluded_ids;
} else {
num_tree->search(a_filter.comparators[value_index], bool_int64, &result_ids, result_ids_len);
}
value_index++;
}

View File

@ -390,10 +390,10 @@ TEST_F(CollectionFilteringTest, FilterAndQueryFieldRestrictions) {
std::ifstream infile(std::string(ROOT_DIR)+"test/multi_field_documents.jsonl");
std::vector<field> fields = {
field("title", field_types::STRING, false),
field("starring", field_types::STRING, false),
field("cast", field_types::STRING_ARRAY, true),
field("points", field_types::INT32, false)
field("title", field_types::STRING, false),
field("starring", field_types::STRING, false),
field("cast", field_types::STRING_ARRAY, true),
field("points", field_types::INT32, false)
};
coll_mul_fields = collectionManager.get_collection("coll_mul_fields").get();
@ -1464,3 +1464,137 @@ TEST_F(CollectionFilteringTest, NumericalRangeFilter) {
collectionManager.drop_collection("coll1");
}
TEST_F(CollectionFilteringTest, QueryBoolFields) {
Collection *coll_bool;
std::ifstream infile(std::string(ROOT_DIR)+"test/bool_documents.jsonl");
std::vector<field> fields = {
field("popular", field_types::BOOL, false),
field("title", field_types::STRING, false),
field("rating", field_types::FLOAT, false),
field("bool_array", field_types::BOOL_ARRAY, false),
};
std::vector<sort_by> sort_fields = { sort_by("popular", "DESC"), sort_by("rating", "DESC") };
coll_bool = collectionManager.get_collection("coll_bool").get();
if(coll_bool == nullptr) {
coll_bool = collectionManager.create_collection("coll_bool", 1, fields, "rating").get();
}
std::string json_line;
while (std::getline(infile, json_line)) {
coll_bool->add(json_line);
}
infile.close();
// Plain search with no filters - results should be sorted correctly
query_fields = {"title"};
std::vector<std::string> facets;
nlohmann::json results = coll_bool->search("the", query_fields, "", facets, sort_fields, {0}, 10, 1, FREQUENCY, {false}).get();
ASSERT_EQ(5, results["hits"].size());
std::vector<std::string> ids = {"1", "3", "4", "9", "2"};
for(size_t i = 0; i < results["hits"].size(); i++) {
nlohmann::json result = results["hits"].at(i);
std::string result_id = result["document"]["id"];
std::string id = ids.at(i);
ASSERT_STREQ(id.c_str(), result_id.c_str());
}
// Searching on a bool field
results = coll_bool->search("the", query_fields, "popular:true", facets, sort_fields, {0}, 10, 1, FREQUENCY, {false}).get();
ASSERT_EQ(3, results["hits"].size());
ids = {"1", "3", "4"};
for(size_t i = 0; i < results["hits"].size(); i++) {
nlohmann::json result = results["hits"].at(i);
std::string result_id = result["document"]["id"];
std::string id = ids.at(i);
ASSERT_STREQ(id.c_str(), result_id.c_str());
}
// alternative `:=` syntax
results = coll_bool->search("the", query_fields, "popular:=true", facets, sort_fields, {0}, 10, 1, FREQUENCY, {false}).get();
ASSERT_EQ(3, results["hits"].size());
results = coll_bool->search("the", query_fields, "popular:false", facets, sort_fields, {0}, 10, 1, FREQUENCY, {false}).get();
ASSERT_EQ(2, results["hits"].size());
results = coll_bool->search("the", query_fields, "popular:= false", facets, sort_fields, {0}, 10, 1, FREQUENCY, {false}).get();
ASSERT_EQ(2, results["hits"].size());
ids = {"9", "2"};
for(size_t i = 0; i < results["hits"].size(); i++) {
nlohmann::json result = results["hits"].at(i);
std::string result_id = result["document"]["id"];
std::string id = ids.at(i);
ASSERT_STREQ(id.c_str(), result_id.c_str());
}
// searching against a bool array field
// should be able to filter with an array of boolean values
Option<nlohmann::json> res_op = coll_bool->search("the", query_fields, "bool_array:[true, false]", facets,
sort_fields, {0}, 10, 1, FREQUENCY, {false});
ASSERT_TRUE(res_op.ok());
results = res_op.get();
ASSERT_EQ(5, results["hits"].size());
results = coll_bool->search("the", query_fields, "bool_array: true", facets, sort_fields, {0}, 10, 1, FREQUENCY, {false}).get();
ASSERT_EQ(4, results["hits"].size());
ids = {"1", "4", "9", "2"};
for(size_t i = 0; i < results["hits"].size(); i++) {
nlohmann::json result = results["hits"].at(i);
std::string result_id = result["document"]["id"];
std::string id = ids.at(i);
ASSERT_STREQ(id.c_str(), result_id.c_str());
}
// should be able to search using array with a single element boolean value
results = coll_bool->search("the", query_fields, "bool_array:[true]", facets,
sort_fields, {0}, 10, 1, FREQUENCY, {false}).get();
ASSERT_EQ(4, results["hits"].size());
for(size_t i = 0; i < results["hits"].size(); i++) {
nlohmann::json result = results["hits"].at(i);
std::string result_id = result["document"]["id"];
std::string id = ids.at(i);
ASSERT_STREQ(id.c_str(), result_id.c_str());
}
// not equals on bool field
results = coll_bool->search("the", query_fields, "popular:!= true", facets,
sort_fields, {0}, 10, 1, FREQUENCY, {false}).get();
ASSERT_EQ(2, results["hits"].size());
ASSERT_EQ("9", results["hits"][0]["document"]["id"].get<std::string>());
ASSERT_EQ("2", results["hits"][1]["document"]["id"].get<std::string>());
// not equals on bool array field
results = coll_bool->search("the", query_fields, "bool_array:!= [true]", facets,
sort_fields, {0}, 10, 1, FREQUENCY, {false}).get();
ASSERT_EQ(1, results["hits"].size());
ASSERT_EQ("3", results["hits"][0]["document"]["id"].get<std::string>());
// empty filter value
res_op = coll_bool->search("the", query_fields, "bool_array:=", facets,
sort_fields, {0}, 10, 1, FREQUENCY, {false});
ASSERT_FALSE(res_op.ok());
ASSERT_EQ("Error with filter field `bool_array`: Filter value cannot be empty.", res_op.error());
collectionManager.drop_collection("coll_bool");
}

View File

@ -1404,115 +1404,6 @@ TEST_F(CollectionTest, ImportDocuments) {
collectionManager.drop_collection("coll_mul_fields");
}
TEST_F(CollectionTest, QueryBoolFields) {
Collection *coll_bool;
std::ifstream infile(std::string(ROOT_DIR)+"test/bool_documents.jsonl");
std::vector<field> fields = {
field("popular", field_types::BOOL, false),
field("title", field_types::STRING, false),
field("rating", field_types::FLOAT, false),
field("bool_array", field_types::BOOL_ARRAY, false),
};
std::vector<sort_by> sort_fields = { sort_by("popular", "DESC"), sort_by("rating", "DESC") };
coll_bool = collectionManager.get_collection("coll_bool").get();
if(coll_bool == nullptr) {
coll_bool = collectionManager.create_collection("coll_bool", 4, fields, "rating").get();
}
std::string json_line;
while (std::getline(infile, json_line)) {
coll_bool->add(json_line);
}
infile.close();
// Plain search with no filters - results should be sorted correctly
query_fields = {"title"};
std::vector<std::string> facets;
nlohmann::json results = coll_bool->search("the", query_fields, "", facets, sort_fields, {0}, 10, 1, FREQUENCY, {false}).get();
ASSERT_EQ(5, results["hits"].size());
std::vector<std::string> ids = {"1", "3", "4", "9", "2"};
for(size_t i = 0; i < results["hits"].size(); i++) {
nlohmann::json result = results["hits"].at(i);
std::string result_id = result["document"]["id"];
std::string id = ids.at(i);
ASSERT_STREQ(id.c_str(), result_id.c_str());
}
// Searching on a bool field
results = coll_bool->search("the", query_fields, "popular:true", facets, sort_fields, {0}, 10, 1, FREQUENCY, {false}).get();
ASSERT_EQ(3, results["hits"].size());
ids = {"1", "3", "4"};
for(size_t i = 0; i < results["hits"].size(); i++) {
nlohmann::json result = results["hits"].at(i);
std::string result_id = result["document"]["id"];
std::string id = ids.at(i);
ASSERT_STREQ(id.c_str(), result_id.c_str());
}
// alternative `:=` syntax
results = coll_bool->search("the", query_fields, "popular:=true", facets, sort_fields, {0}, 10, 1, FREQUENCY, {false}).get();
ASSERT_EQ(3, results["hits"].size());
results = coll_bool->search("the", query_fields, "popular:false", facets, sort_fields, {0}, 10, 1, FREQUENCY, {false}).get();
ASSERT_EQ(2, results["hits"].size());
ids = {"9", "2"};
for(size_t i = 0; i < results["hits"].size(); i++) {
nlohmann::json result = results["hits"].at(i);
std::string result_id = result["document"]["id"];
std::string id = ids.at(i);
ASSERT_STREQ(id.c_str(), result_id.c_str());
}
// searching against a bool array field
// should be able to filter with an array of boolean values
Option<nlohmann::json> res_op = coll_bool->search("the", query_fields, "bool_array:[true, false]", facets,
sort_fields, {0}, 10, 1, FREQUENCY, {false});
ASSERT_TRUE(res_op.ok());
results = res_op.get();
ASSERT_EQ(5, results["hits"].size());
results = coll_bool->search("the", query_fields, "bool_array: true", facets, sort_fields, {0}, 10, 1, FREQUENCY, {false}).get();
ASSERT_EQ(4, results["hits"].size());
ids = {"1", "4", "9", "2"};
for(size_t i = 0; i < results["hits"].size(); i++) {
nlohmann::json result = results["hits"].at(i);
std::string result_id = result["document"]["id"];
std::string id = ids.at(i);
ASSERT_STREQ(id.c_str(), result_id.c_str());
}
// should be able to search using array with a single element boolean value
auto res = coll_bool->search("the", query_fields, "bool_array:[true]", facets,
sort_fields, {0}, 10, 1, FREQUENCY, {false}).get();
results = coll_bool->search("the", query_fields, "bool_array: true", facets, sort_fields, {0}, 10, 1, FREQUENCY, {false}).get();
ASSERT_EQ(4, results["hits"].size());
for(size_t i = 0; i < results["hits"].size(); i++) {
nlohmann::json result = results["hits"].at(i);
std::string result_id = result["document"]["id"];
std::string id = ids.at(i);
ASSERT_STREQ(id.c_str(), result_id.c_str());
}
collectionManager.drop_collection("coll_bool");
}
TEST_F(CollectionTest, SearchingWithMissingFields) {
// return error without crashing when searching for fields that do not conform to the schema
Collection *coll_array_fields;