Use token separators and symbols during filtering.

This commit is contained in:
Kishore Nallan 2021-10-29 15:49:22 +05:30
parent ba91e69c04
commit 62be28b5c3
2 changed files with 49 additions and 1 deletions

View File

@ -1493,7 +1493,7 @@ void Index::do_filtering(uint32_t*& filter_ids, uint32_t& filter_ids_length,
// there could be multiple tokens in a filter value, which we have to treat as ANDs
// e.g. country: South Africa
Tokenizer tokenizer(filter_value, true, false, f.locale);
Tokenizer tokenizer(filter_value, true, false, f.locale, symbols_to_index, token_separators);
std::string str_token;
size_t token_index = 0;

View File

@ -1724,6 +1724,12 @@ TEST_F(CollectionFilteringTest, FilterStringsWithComma) {
ASSERT_EQ(1, results["found"].get<size_t>());
ASSERT_STREQ("0", results["hits"][0]["document"]["id"].get<std::string>().c_str());
results = coll1->search("*", {"place"}, "place:= `St. John's Cathedral, Denver, Colorado`", {}, {}, {0}, 10, 1,
FREQUENCY, {true}, 10).get();
ASSERT_EQ(1, results["found"].get<size_t>());
ASSERT_STREQ("0", results["hits"][0]["document"]["id"].get<std::string>().c_str());
results = coll1->search("*", {"place"}, "place:= [`St. John's Cathedral, Denver, Colorado`]", {}, {}, {0}, 10, 1,
FREQUENCY, {true}, 10).get();
@ -1914,3 +1920,45 @@ TEST_F(CollectionFilteringTest, QueryBoolFields) {
collectionManager.drop_collection("coll_bool");
}
TEST_F(CollectionFilteringTest, FilteringWithTokenSeparators) {
std::vector<field> fields = {field("code", field_types::STRING, true)};
Collection* coll1 = collectionManager.create_collection(
"coll1", 1, fields, "", 0, "", {}, {"."}
).get();
nlohmann::json doc1;
doc1["id"] = "0";
doc1["code"] = "7318.15";
ASSERT_TRUE(coll1->add(doc1.dump()).ok());
auto results = coll1->search("*", {},"code:=7318.15", {}, {}, {0}, 10,
1, FREQUENCY, {false}).get();
ASSERT_EQ(1, results["hits"].size());
results = coll1->search("*", {},"code:=`7318.15`", {}, {}, {0}, 10,
1, FREQUENCY, {false}).get();
ASSERT_EQ(1, results["hits"].size());
collectionManager.drop_collection("coll1");
Collection* coll2 = collectionManager.create_collection(
"coll2", 1, fields, "", 0, "", {"."}, {}
).get();
doc1["id"] = "0";
doc1["code"] = "7318.15";
ASSERT_TRUE(coll2->add(doc1.dump()).ok());
results = coll2->search("*", {},"code:=7318.15", {}, {}, {0}, 10,
1, FREQUENCY, {false}).get();
ASSERT_EQ(1, results["hits"].size());
collectionManager.drop_collection("coll2");
}