Fix filtering on stemmed fields (#1776)

* Fix filtering on stemmed fields

* Fix getting stemmer twice
This commit is contained in:
Ozan Armağan 2024-06-06 14:19:12 +03:00 committed by GitHub
parent b4b399a4b7
commit 5859e0fd3f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 44 additions and 2 deletions

View File

@ -639,10 +639,20 @@ Option<bool> toFilter(const std::string expression,
return Option<bool>(400, "Error with filter field `" + _field.name +
"`: Filter value array cannot be empty.");
}
if(_field.stem) {
auto stemmer = _field.get_stemmer();
for (std::string& filter_value: filter_values) {
filter_value = stemmer->stem(filter_value);
}
}
filter_exp = {field_name, filter_values, {str_comparator}};
} else {
filter_exp = {field_name, {raw_value.substr(filter_value_index)}, {str_comparator}};
std::string filter_value = raw_value.substr(filter_value_index);
if(_field.stem) {
auto stemmer = _field.get_stemmer();
filter_value = stemmer->stem(filter_value);
}
filter_exp = {field_name, {filter_value}, {str_comparator}};
}
filter_exp.apply_not_equals = apply_not_equals;

View File

@ -2564,3 +2564,35 @@ TEST_F(CollectionFilteringTest, PrefixFilterOnTextFields) {
ASSERT_EQ(id, result_id);
}
}
TEST_F(CollectionFilteringTest, FilterOnStemmedField) {
nlohmann::json schema = R"({
"name": "companies",
"fields": [
{"name": "keywords", "type": "string[]", "facet": true, "stem": true }
]
})"_json;
auto op = collectionManager.create_collection(schema);
ASSERT_TRUE(op.ok());
auto coll = op.get();
nlohmann::json doc1 = {
{"id", "124"},
{"keywords", {"Restaurant"}}
};
nlohmann::json doc2 = {
{"id", "125"},
{"keywords", {"Baking"}}
};
ASSERT_TRUE(coll->add(doc1.dump()).ok());
ASSERT_TRUE(coll->add(doc2.dump()).ok());
auto results = coll->search("*", {}, "keywords:=Baking", {}, {}, {0}, 10, 1, FREQUENCY, {false}).get();
ASSERT_EQ(1, results["hits"].size());
ASSERT_EQ("125", results["hits"][0]["document"]["id"].get<std::string>());
}