mirror of
https://github.com/typesense/typesense.git
synced 2025-05-19 13:12:22 +08:00
Support exclude filtering for string facets.
This commit is contained in:
parent
25f6fe0614
commit
e6a11f74fc
@ -133,6 +133,7 @@ enum NUM_COMPARATOR {
|
||||
LESS_THAN,
|
||||
LESS_THAN_EQUALS,
|
||||
EQUALS,
|
||||
NOT_EQUALS,
|
||||
CONTAINS,
|
||||
GREATER_THAN,
|
||||
GREATER_THAN_EQUALS,
|
||||
|
@ -1967,6 +1967,9 @@ Option<bool> Collection::parse_geopoint_filter_value(std::string& raw_value,
|
||||
|
||||
Option<bool> Collection::parse_filter_query(const std::string& simple_filter_query,
|
||||
std::vector<filter>& filters) const {
|
||||
|
||||
std::vector<filter> exclude_filters; // to ensure that they go last in the list of filters
|
||||
|
||||
std::vector<std::string> filter_blocks;
|
||||
StringUtils::split(simple_filter_query, filter_blocks, "&&");
|
||||
|
||||
@ -2141,6 +2144,15 @@ Option<bool> Collection::parse_filter_query(const std::string& simple_filter_que
|
||||
// string filter should be evaluated in strict "equals" mode
|
||||
str_comparator = EQUALS;
|
||||
while(raw_value[++filter_value_index] == ' ');
|
||||
} else if(raw_value[0] == '-') {
|
||||
if(!_field.facet) {
|
||||
// EXCLUDE filtering on string is possible only on facet fields
|
||||
return Option<bool>(400, "To perform exclude filtering, filter field `" +
|
||||
_field.name + "` must be a facet field.");
|
||||
}
|
||||
|
||||
str_comparator = NOT_EQUALS;
|
||||
while(raw_value[++filter_value_index] == ' ');
|
||||
}
|
||||
|
||||
if(raw_value[filter_value_index] == '[' && raw_value[raw_value.size() - 1] == ']') {
|
||||
@ -2155,9 +2167,15 @@ Option<bool> Collection::parse_filter_query(const std::string& simple_filter_que
|
||||
"`: Unidentified field data type, see docs for supported data types.");
|
||||
}
|
||||
|
||||
filters.push_back(f);
|
||||
if(f.comparators.size() > 0 && f.comparators.front() == NOT_EQUALS) {
|
||||
exclude_filters.push_back(f);
|
||||
} else {
|
||||
filters.push_back(f);
|
||||
}
|
||||
}
|
||||
|
||||
filters.insert( filters.end(), exclude_filters.begin(), exclude_filters.end() );
|
||||
|
||||
return Option<bool>(true);
|
||||
}
|
||||
|
||||
|
@ -1252,7 +1252,7 @@ uint32_t Index::do_filtering(uint32_t** filter_ids_out, const std::vector<filter
|
||||
}
|
||||
}
|
||||
|
||||
if(a_filter.comparators[0] == EQUALS && f.is_facet()) {
|
||||
if((a_filter.comparators[0] == EQUALS || a_filter.comparators[0] == NOT_EQUALS) && f.is_facet()) {
|
||||
// need to do exact match (unlike CONTAINS) by using the facet index
|
||||
// field being a facet is already enforced upstream
|
||||
uint32_t* exact_strt_ids = new uint32_t[strt_ids_size];
|
||||
@ -1305,14 +1305,36 @@ uint32_t Index::do_filtering(uint32_t** filter_ids_out, const std::vector<filter
|
||||
strt_ids_size = exact_strt_size;
|
||||
}
|
||||
|
||||
// Otherwise, we just ensure that given record contains tokens in the filter query
|
||||
// (NOT implemented) if the query is wrapped by double quotes, ensure phrase match
|
||||
// bool exact_match = (filter_value.front() == '"' && filter_value.back() == '"');
|
||||
uint32_t* out = nullptr;
|
||||
ids_size = ArrayUtils::or_scalar(ids, ids_size, strt_ids, strt_ids_size, &out);
|
||||
delete[] strt_ids;
|
||||
delete[] ids;
|
||||
ids = out;
|
||||
if(a_filter.comparators[0] == NOT_EQUALS && f.is_facet()) {
|
||||
// exclude records from existing IDs (from previous filters or ALL records)
|
||||
// upstream will guarantee that NOT_EQUALS is placed right at the end of filters list
|
||||
if(ids == nullptr) {
|
||||
if(filter_ids == nullptr) {
|
||||
ids = seq_ids.uncompress();
|
||||
ids_size = seq_ids.getLength();
|
||||
} else {
|
||||
ids = filter_ids;
|
||||
ids_size = filter_ids_length;
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t* excluded_strt_ids = new uint32_t[strt_ids_size];
|
||||
size_t excluded_strt_size = 0;
|
||||
excluded_strt_size = ArrayUtils::exclude_scalar(ids, ids_size, strt_ids,
|
||||
strt_ids_size, &excluded_strt_ids);
|
||||
|
||||
delete [] ids;
|
||||
ids = excluded_strt_ids;
|
||||
ids_size = excluded_strt_size;
|
||||
|
||||
} else {
|
||||
// Otherwise, we just ensure that given record contains tokens in the filter query
|
||||
uint32_t* out = nullptr;
|
||||
ids_size = ArrayUtils::or_scalar(ids, ids_size, strt_ids, strt_ids_size, &out);
|
||||
delete[] strt_ids;
|
||||
delete[] ids;
|
||||
ids = out;
|
||||
}
|
||||
}
|
||||
|
||||
result_ids = ids;
|
||||
|
@ -1289,5 +1289,57 @@ TEST_F(CollectionFilteringTest, NumericalFilteringWithArray) {
|
||||
ASSERT_EQ(4, results["found"].get<size_t>());
|
||||
ASSERT_EQ(4, results["hits"].size());
|
||||
|
||||
collectionManager.drop_collection("coll1");
|
||||
}
|
||||
|
||||
TEST_F(CollectionFilteringTest, NegationOperatorBasics) {
|
||||
Collection *coll1;
|
||||
|
||||
std::vector<field> fields = {field("title", field_types::STRING, false),
|
||||
field("artist", field_types::STRING, true),
|
||||
field("points", field_types::INT32, false),};
|
||||
|
||||
coll1 = collectionManager.get_collection("coll1").get();
|
||||
if(coll1 == nullptr) {
|
||||
coll1 = collectionManager.create_collection("coll1", 1, fields, "points").get();
|
||||
}
|
||||
|
||||
std::vector<std::vector<std::string>> records = {
|
||||
{"Taylor Swift Karaoke: reputation", "Taylor Swift"},
|
||||
{"Beat it", "Michael Jackson"},
|
||||
{"Style", "Taylor Swift"},
|
||||
{"Thriller", "Michael Joseph Jackson"},
|
||||
};
|
||||
|
||||
for(size_t i=0; i<records.size(); i++) {
|
||||
nlohmann::json doc;
|
||||
|
||||
doc["id"] = std::to_string(i);
|
||||
doc["title"] = records[i][0];
|
||||
doc["artist"] = records[i][1];
|
||||
doc["points"] = i;
|
||||
|
||||
ASSERT_TRUE(coll1->add(doc.dump()).ok());
|
||||
}
|
||||
|
||||
auto results = coll1->search("*", {"artist"}, "artist:- Michael Jackson", {}, {}, 0, 10, 1, FREQUENCY, true, 10).get();
|
||||
|
||||
ASSERT_EQ(3, results["found"].get<size_t>());
|
||||
|
||||
ASSERT_STREQ("3", results["hits"][0]["document"]["id"].get<std::string>().c_str());
|
||||
ASSERT_STREQ("2", results["hits"][1]["document"]["id"].get<std::string>().c_str());
|
||||
ASSERT_STREQ("0", results["hits"][2]["document"]["id"].get<std::string>().c_str());
|
||||
|
||||
results = coll1->search("*", {"artist"}, "artist:- Michael Jackson && points: >0", {}, {}, 0, 10, 1, FREQUENCY, true, 10).get();
|
||||
ASSERT_EQ(2, results["found"].get<size_t>());
|
||||
ASSERT_STREQ("3", results["hits"][0]["document"]["id"].get<std::string>().c_str());
|
||||
ASSERT_STREQ("2", results["hits"][1]["document"]["id"].get<std::string>().c_str());
|
||||
|
||||
// negation operation on multiple values
|
||||
|
||||
results = coll1->search("*", {"artist"}, "artist:- [Michael Jackson, Taylor Swift]", {}, {}, 0, 10, 1, FREQUENCY, true, 10).get();
|
||||
ASSERT_EQ(1, results["found"].get<size_t>());
|
||||
ASSERT_STREQ("3", results["hits"][0]["document"]["id"].get<std::string>().c_str());
|
||||
|
||||
collectionManager.drop_collection("coll1");
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user