mirror of
https://github.com/typesense/typesense.git
synced 2025-05-21 06:02:26 +08:00
Don't highlight very large docs > 64K words.
This commit is contained in:
parent
ec4311635e
commit
f9242dd4a5
@ -4067,7 +4067,8 @@ bool Collection::handle_highlight_text(std::string& text, bool normalise, const
|
||||
// ensures that the `snippet_start_offset` is always from a matched token, and not from query suggestion
|
||||
bool match_offset_found = (found_first_match && token_already_found) ||
|
||||
(match_offset_index <= last_valid_offset_index &&
|
||||
match.offsets[match_offset_index].offset == raw_token_index);
|
||||
match.offsets[match_offset_index].offset == raw_token_index &&
|
||||
text_len/4 < 64000);
|
||||
|
||||
// Token might not appear in the best matched window, which is limited to a size of 10.
|
||||
// If field is marked to be highlighted fully, or field length exceeds snippet_threshold, we will
|
||||
|
@ -2761,6 +2761,37 @@ TEST_F(CollectionSpecificMoreTest, DisableTyposForNumericalTokens) {
|
||||
ASSERT_EQ(2, res_op.get()["hits"].size());
|
||||
}
|
||||
|
||||
TEST_F(CollectionSpecificMoreTest, DisableHighlightForLongFields) {
|
||||
nlohmann::json schema = R"({
|
||||
"name": "coll1",
|
||||
"fields": [
|
||||
{"name": "description", "type": "string"}
|
||||
]
|
||||
})"_json;
|
||||
|
||||
Collection* coll1 = collectionManager.create_collection(schema).get();
|
||||
|
||||
std::string description;
|
||||
for(size_t i = 0; i < 100*1000; i++) {
|
||||
description += StringUtils::randstring(4) + " ";
|
||||
}
|
||||
|
||||
description += "foobar";
|
||||
|
||||
nlohmann::json doc;
|
||||
doc["description"] = description;
|
||||
ASSERT_TRUE(coll1->add(doc.dump()).ok());
|
||||
|
||||
auto res_op = coll1->search("foobar", {"description"}, "", {},
|
||||
{}, {2}, 10, 1,FREQUENCY, {true},
|
||||
Index::DROP_TOKENS_THRESHOLD, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "");
|
||||
|
||||
ASSERT_TRUE(res_op.ok());
|
||||
ASSERT_EQ(1, res_op.get()["hits"].size());
|
||||
ASSERT_EQ(0, res_op.get()["hits"][0]["highlight"].size());
|
||||
}
|
||||
|
||||
TEST_F(CollectionSpecificMoreTest, TestStemming) {
|
||||
nlohmann::json schema = R"({
|
||||
"name": "test",
|
||||
|
Loading…
x
Reference in New Issue
Block a user