Merge pull request #1100 from happy-san/v0.25-join

Trim down filter token to 100 characters.
This commit is contained in:
Kishore Nallan 2023-07-12 18:40:52 +05:30 committed by GitHub
commit 2b204112ef
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 43 additions and 1 deletions

View File

@ -1862,6 +1862,9 @@ Option<bool> Index::do_filtering(filter_node_t* const root,
std::vector<std::string> str_tokens;
while (tokenizer.next(str_token, token_index)) {
if (str_token.size() > 100) {
str_token.erase(100);
}
str_tokens.push_back(str_token);
art_leaf* leaf = (art_leaf *) art_search(t, (const unsigned char*) str_token.c_str(),

View File

@ -2650,4 +2650,43 @@ TEST_F(CollectionFilteringTest, ComplexFilterQuery) {
ASSERT_EQ("`filter_by` has too many operations.", search_op.error());
collectionManager.drop_collection("ComplexFilterQueryCollection");
}
}
TEST_F(CollectionFilteringTest, LargeFilterToken) {
nlohmann::json json =
R"({
"name": "LargeFilterTokenCollection",
"fields": [
{"name": "uri", "type": "string"}
],
"symbols_to_index": [
"/",
"-"
]
})"_json;
auto op = collectionManager.create_collection(json);
ASSERT_TRUE(op.ok());
auto coll = op.get();
json.clear();
std::string token = "rade/aols/insolvenzrecht/persoenliche-risiken-fuer-organe-von-kapitalgesellschaften-gmbh-"
"geschaeftsfuehrer-ag-vorstand";
json["uri"] = token;
auto add_op = coll->add(json.dump());
ASSERT_TRUE(add_op.ok());
auto results = coll->search("*", query_fields, "", {}, sort_fields, {0}, 10, 1, FREQUENCY, {false}).get();
ASSERT_EQ(1, results["hits"].size());
results = coll->search("*", query_fields, "uri:" + token, {}, sort_fields, {0}, 10, 1, FREQUENCY, {false}).get();
ASSERT_EQ(1, results["hits"].size());
token.erase(100); // Max token length that's indexed is 100, we'll still get a match.
results = coll->search("*", query_fields, "uri:" + token, {}, sort_fields, {0}, 10, 1, FREQUENCY, {false}).get();
ASSERT_EQ(1, results["hits"].size());
token.erase(99);
results = coll->search("*", query_fields, "uri:" + token, {}, sort_fields, {0}, 10, 1, FREQUENCY, {false}).get();
ASSERT_EQ(0, results["hits"].size());
}