From 5859e0fd3fff37a0b2d54f0cbf2eb24e6754e784 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ozan=20Arma=C4=9Fan?= <70442658+ozanarmagan@users.noreply.github.com> Date: Thu, 6 Jun 2024 14:19:12 +0300 Subject: [PATCH] Fix filtering on stemmed fields (#1776) * Fix filtering on stemmed fields * Fix getting stemmer twice --- src/filter.cpp | 14 +++++++++++-- test/collection_filtering_test.cpp | 32 ++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 2 deletions(-) diff --git a/src/filter.cpp b/src/filter.cpp index cba4e83d..7d55ef91 100644 --- a/src/filter.cpp +++ b/src/filter.cpp @@ -639,10 +639,20 @@ Option toFilter(const std::string expression, return Option(400, "Error with filter field `" + _field.name + "`: Filter value array cannot be empty."); } - + if(_field.stem) { + auto stemmer = _field.get_stemmer(); + for (std::string& filter_value: filter_values) { + filter_value = stemmer->stem(filter_value); + } + } filter_exp = {field_name, filter_values, {str_comparator}}; } else { - filter_exp = {field_name, {raw_value.substr(filter_value_index)}, {str_comparator}}; + std::string filter_value = raw_value.substr(filter_value_index); + if(_field.stem) { + auto stemmer = _field.get_stemmer(); + filter_value = stemmer->stem(filter_value); + } + filter_exp = {field_name, {filter_value}, {str_comparator}}; } filter_exp.apply_not_equals = apply_not_equals; diff --git a/test/collection_filtering_test.cpp b/test/collection_filtering_test.cpp index 95799dbf..56953d84 100644 --- a/test/collection_filtering_test.cpp +++ b/test/collection_filtering_test.cpp @@ -2564,3 +2564,35 @@ TEST_F(CollectionFilteringTest, PrefixFilterOnTextFields) { ASSERT_EQ(id, result_id); } } + +TEST_F(CollectionFilteringTest, FilterOnStemmedField) { + nlohmann::json schema = R"({ + "name": "companies", + "fields": [ + {"name": "keywords", "type": "string[]", "facet": true, "stem": true } + ] + })"_json; + + auto op = collectionManager.create_collection(schema); + ASSERT_TRUE(op.ok()); + + auto coll = op.get(); + + nlohmann::json doc1 = { + {"id", "124"}, + {"keywords", {"Restaurant"}} + }; + + nlohmann::json doc2 = { + {"id", "125"}, + {"keywords", {"Baking"}} + }; + + ASSERT_TRUE(coll->add(doc1.dump()).ok()); + ASSERT_TRUE(coll->add(doc2.dump()).ok()); + + auto results = coll->search("*", {}, "keywords:=Baking", {}, {}, {0}, 10, 1, FREQUENCY, {false}).get(); + ASSERT_EQ(1, results["hits"].size()); + ASSERT_EQ("125", results["hits"][0]["document"]["id"].get()); + +} \ No newline at end of file