Handle special characters in prefix highlighting.

This commit is contained in:
Kishore Nallan 2023-03-06 13:31:38 +05:30
parent f48b9ffe6e
commit cfbcbfc6fb
4 changed files with 39 additions and 0 deletions

View File

@ -85,4 +85,6 @@ public:
}
void decr_token_counter();
bool should_skip_char(char c);
};

View File

@ -2832,6 +2832,12 @@ bool Collection::handle_highlight_text(std::string& text, bool normalise, const
// group unicode code points and calculate number of actual characters
while(k <= tok_end) {
k++;
if(tokenizer.should_skip_char(text[k])) {
// used to handle special characters inside a tokenized word, e.g. `foo-bar`
continue;
}
if ((text[k] & 0xC0) == 0x80) k++;
if ((text[k] & 0xC0) == 0x80) k++;
if ((text[k] & 0xC0) == 0x80) k++;

View File

@ -334,3 +334,7 @@ void Tokenizer::decr_token_counter() {
token_counter--;
}
}
bool Tokenizer::should_skip_char(char c) {
return is_ascii_char(c) && get_stream_mode(c) != INDEX;
}

View File

@ -1338,6 +1338,33 @@ TEST_F(CollectionSpecificMoreTest, CopyDocHelper) {
ASSERT_EQ(1, dst.count("foo.bar"));
}
TEST_F(CollectionSpecificMoreTest, HighlightWordWithSymbols) {
nlohmann::json schema = R"({
"name": "coll1",
"fields": [
{"name": "title", "type": "string"}
]
})"_json;
Collection *coll1 = collectionManager.create_collection(schema).get();
nlohmann::json doc;
doc["title"] = "var(--icon-secondary-neutral); For components with";
ASSERT_TRUE(coll1->add(doc.dump()).ok());
auto res = coll1->search("favicon", {"title"}, "", {}, {}, {2}, 10, 1,
FREQUENCY, {true},
10, spp::sparse_hash_set<std::string>(),
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "locations.address",
20, {}, {}, {}, 0, "<mark>", "</mark>", {}, 1000, true, false, true,
"title").get();
ASSERT_EQ(1, res["hits"].size());
ASSERT_EQ("<mark>var(--icon</mark>-secondary-neutral); For components with",
res["hits"][0]["highlight"]["title"]["snippet"].get<std::string>());
}
TEST_F(CollectionSpecificMoreTest, HighlightObjectShouldBeEmptyWhenNoHighlightFieldFound) {
nlohmann::json schema = R"({
"name": "coll1",