diff --git a/src/collection.cpp b/src/collection.cpp index 1bab550d..7b2ba723 100644 --- a/src/collection.cpp +++ b/src/collection.cpp @@ -4067,7 +4067,8 @@ bool Collection::handle_highlight_text(std::string& text, bool normalise, const // ensures that the `snippet_start_offset` is always from a matched token, and not from query suggestion bool match_offset_found = (found_first_match && token_already_found) || (match_offset_index <= last_valid_offset_index && - match.offsets[match_offset_index].offset == raw_token_index); + match.offsets[match_offset_index].offset == raw_token_index && + text_len/4 < 64000); // Token might not appear in the best matched window, which is limited to a size of 10. // If field is marked to be highlighted fully, or field length exceeds snippet_threshold, we will diff --git a/test/collection_specific_more_test.cpp b/test/collection_specific_more_test.cpp index bcc29a7c..1e286e96 100644 --- a/test/collection_specific_more_test.cpp +++ b/test/collection_specific_more_test.cpp @@ -2761,6 +2761,37 @@ TEST_F(CollectionSpecificMoreTest, DisableTyposForNumericalTokens) { ASSERT_EQ(2, res_op.get()["hits"].size()); } +TEST_F(CollectionSpecificMoreTest, DisableHighlightForLongFields) { + nlohmann::json schema = R"({ + "name": "coll1", + "fields": [ + {"name": "description", "type": "string"} + ] + })"_json; + + Collection* coll1 = collectionManager.create_collection(schema).get(); + + std::string description; + for(size_t i = 0; i < 100*1000; i++) { + description += StringUtils::randstring(4) + " "; + } + + description += "foobar"; + + nlohmann::json doc; + doc["description"] = description; + ASSERT_TRUE(coll1->add(doc.dump()).ok()); + + auto res_op = coll1->search("foobar", {"description"}, "", {}, + {}, {2}, 10, 1,FREQUENCY, {true}, + Index::DROP_TOKENS_THRESHOLD, spp::sparse_hash_set(), + spp::sparse_hash_set(), 10, ""); + + ASSERT_TRUE(res_op.ok()); + ASSERT_EQ(1, res_op.get()["hits"].size()); + ASSERT_EQ(0, res_op.get()["hits"][0]["highlight"].size()); +} + TEST_F(CollectionSpecificMoreTest, TestStemming) { nlohmann::json schema = R"({ "name": "test",