Support exclude filtering for string facets.

This commit is contained in:
Kishore Nallan 2021-05-07 13:32:03 +05:30
parent 25f6fe0614
commit e6a11f74fc
4 changed files with 103 additions and 10 deletions

View File

@ -133,6 +133,7 @@ enum NUM_COMPARATOR {
LESS_THAN,
LESS_THAN_EQUALS,
EQUALS,
NOT_EQUALS,
CONTAINS,
GREATER_THAN,
GREATER_THAN_EQUALS,

View File

@ -1967,6 +1967,9 @@ Option<bool> Collection::parse_geopoint_filter_value(std::string& raw_value,
Option<bool> Collection::parse_filter_query(const std::string& simple_filter_query,
std::vector<filter>& filters) const {
std::vector<filter> exclude_filters; // to ensure that they go last in the list of filters
std::vector<std::string> filter_blocks;
StringUtils::split(simple_filter_query, filter_blocks, "&&");
@ -2141,6 +2144,15 @@ Option<bool> Collection::parse_filter_query(const std::string& simple_filter_que
// string filter should be evaluated in strict "equals" mode
str_comparator = EQUALS;
while(raw_value[++filter_value_index] == ' ');
} else if(raw_value[0] == '-') {
if(!_field.facet) {
// EXCLUDE filtering on string is possible only on facet fields
return Option<bool>(400, "To perform exclude filtering, filter field `" +
_field.name + "` must be a facet field.");
}
str_comparator = NOT_EQUALS;
while(raw_value[++filter_value_index] == ' ');
}
if(raw_value[filter_value_index] == '[' && raw_value[raw_value.size() - 1] == ']') {
@ -2155,9 +2167,15 @@ Option<bool> Collection::parse_filter_query(const std::string& simple_filter_que
"`: Unidentified field data type, see docs for supported data types.");
}
filters.push_back(f);
if(f.comparators.size() > 0 && f.comparators.front() == NOT_EQUALS) {
exclude_filters.push_back(f);
} else {
filters.push_back(f);
}
}
filters.insert( filters.end(), exclude_filters.begin(), exclude_filters.end() );
return Option<bool>(true);
}

View File

@ -1252,7 +1252,7 @@ uint32_t Index::do_filtering(uint32_t** filter_ids_out, const std::vector<filter
}
}
if(a_filter.comparators[0] == EQUALS && f.is_facet()) {
if((a_filter.comparators[0] == EQUALS || a_filter.comparators[0] == NOT_EQUALS) && f.is_facet()) {
// need to do exact match (unlike CONTAINS) by using the facet index
// field being a facet is already enforced upstream
uint32_t* exact_strt_ids = new uint32_t[strt_ids_size];
@ -1305,14 +1305,36 @@ uint32_t Index::do_filtering(uint32_t** filter_ids_out, const std::vector<filter
strt_ids_size = exact_strt_size;
}
// Otherwise, we just ensure that given record contains tokens in the filter query
// (NOT implemented) if the query is wrapped by double quotes, ensure phrase match
// bool exact_match = (filter_value.front() == '"' && filter_value.back() == '"');
uint32_t* out = nullptr;
ids_size = ArrayUtils::or_scalar(ids, ids_size, strt_ids, strt_ids_size, &out);
delete[] strt_ids;
delete[] ids;
ids = out;
if(a_filter.comparators[0] == NOT_EQUALS && f.is_facet()) {
// exclude records from existing IDs (from previous filters or ALL records)
// upstream will guarantee that NOT_EQUALS is placed right at the end of filters list
if(ids == nullptr) {
if(filter_ids == nullptr) {
ids = seq_ids.uncompress();
ids_size = seq_ids.getLength();
} else {
ids = filter_ids;
ids_size = filter_ids_length;
}
}
uint32_t* excluded_strt_ids = new uint32_t[strt_ids_size];
size_t excluded_strt_size = 0;
excluded_strt_size = ArrayUtils::exclude_scalar(ids, ids_size, strt_ids,
strt_ids_size, &excluded_strt_ids);
delete [] ids;
ids = excluded_strt_ids;
ids_size = excluded_strt_size;
} else {
// Otherwise, we just ensure that given record contains tokens in the filter query
uint32_t* out = nullptr;
ids_size = ArrayUtils::or_scalar(ids, ids_size, strt_ids, strt_ids_size, &out);
delete[] strt_ids;
delete[] ids;
ids = out;
}
}
result_ids = ids;

View File

@ -1289,5 +1289,57 @@ TEST_F(CollectionFilteringTest, NumericalFilteringWithArray) {
ASSERT_EQ(4, results["found"].get<size_t>());
ASSERT_EQ(4, results["hits"].size());
collectionManager.drop_collection("coll1");
}
TEST_F(CollectionFilteringTest, NegationOperatorBasics) {
Collection *coll1;
std::vector<field> fields = {field("title", field_types::STRING, false),
field("artist", field_types::STRING, true),
field("points", field_types::INT32, false),};
coll1 = collectionManager.get_collection("coll1").get();
if(coll1 == nullptr) {
coll1 = collectionManager.create_collection("coll1", 1, fields, "points").get();
}
std::vector<std::vector<std::string>> records = {
{"Taylor Swift Karaoke: reputation", "Taylor Swift"},
{"Beat it", "Michael Jackson"},
{"Style", "Taylor Swift"},
{"Thriller", "Michael Joseph Jackson"},
};
for(size_t i=0; i<records.size(); i++) {
nlohmann::json doc;
doc["id"] = std::to_string(i);
doc["title"] = records[i][0];
doc["artist"] = records[i][1];
doc["points"] = i;
ASSERT_TRUE(coll1->add(doc.dump()).ok());
}
auto results = coll1->search("*", {"artist"}, "artist:- Michael Jackson", {}, {}, 0, 10, 1, FREQUENCY, true, 10).get();
ASSERT_EQ(3, results["found"].get<size_t>());
ASSERT_STREQ("3", results["hits"][0]["document"]["id"].get<std::string>().c_str());
ASSERT_STREQ("2", results["hits"][1]["document"]["id"].get<std::string>().c_str());
ASSERT_STREQ("0", results["hits"][2]["document"]["id"].get<std::string>().c_str());
results = coll1->search("*", {"artist"}, "artist:- Michael Jackson && points: >0", {}, {}, 0, 10, 1, FREQUENCY, true, 10).get();
ASSERT_EQ(2, results["found"].get<size_t>());
ASSERT_STREQ("3", results["hits"][0]["document"]["id"].get<std::string>().c_str());
ASSERT_STREQ("2", results["hits"][1]["document"]["id"].get<std::string>().c_str());
// negation operation on multiple values
results = coll1->search("*", {"artist"}, "artist:- [Michael Jackson, Taylor Swift]", {}, {}, 0, 10, 1, FREQUENCY, true, 10).get();
ASSERT_EQ(1, results["found"].get<size_t>());
ASSERT_STREQ("3", results["hits"][0]["document"]["id"].get<std::string>().c_str());
collectionManager.drop_collection("coll1");
}