Limit token size to handle bad data.

This commit is contained in:
Kishore Nallan 2023-03-03 17:31:13 +05:30
parent 79ddbd56de
commit 15d3da7203

View File

@ -1146,6 +1146,10 @@ void Index::tokenize_string_with_facets(const std::string& text, bool is_facet,
continue;
}
if(token.size() > 100) {
token.erase(100);
}
token_to_offsets[token].push_back(token_index + 1);
last_token = token;
@ -1191,6 +1195,10 @@ void Index::tokenize_string_array_with_facets(const std::vector<std::string>& st
continue;
}
if(token.size() > 100) {
token.erase(100);
}
token_to_offsets[token].push_back(token_index + 1);
token_set.insert(token);
last_token = token;