diff --git a/src/tokenizer.cpp b/src/tokenizer.cpp
index ec050be6..bd1543e9 100644
--- a/src/tokenizer.cpp
+++ b/src/tokenizer.cpp
@@ -61,6 +61,11 @@ bool Tokenizer::next(std::string &token, size_t& token_index) {
         *p = 0;
         size_t insize = (p - &inbuf[0]);
 
+        if(!normalize) {
+            out << inbuf;
+            continue;
+        }
+
         char outbuf[5] = {};
         size_t outsize = sizeof(outbuf);
         char *outptr = outbuf;
diff --git a/test/collection_test.cpp b/test/collection_test.cpp
index 76d7ad80..0201ef0b 100644
--- a/test/collection_test.cpp
+++ b/test/collection_test.cpp
@@ -2637,3 +2637,41 @@ TEST_F(CollectionTest, MultiFieldRelevance) {
 
     collectionManager.drop_collection("coll1");
 }
+
+TEST_F(CollectionTest, HighlightWithAccentedCharacters) {
+    Collection *coll1;
+
+    std::vector<field> fields = {field("title", field_types::STRING, false),
+                                 field("points", field_types::INT32, false),};
+
+    coll1 = collectionManager.get_collection("coll1");
+    if (coll1 == nullptr) {
+        coll1 = collectionManager.create_collection("coll1", 4, fields, "points").get();
+    }
+
+    std::vector<std::vector<std::string>> records = {
+        {"Mise à  jour  Timy depuis PC"},
+        {"Down There by the Train"},
+        {"State Trooper"},
+    };
+
+    for (size_t i = 0; i < records.size(); i++) {
+        nlohmann::json doc;
+
+        doc["id"] = std::to_string(i);
+        doc["title"] = records[i][0];
+        doc["points"] = i;
+
+        ASSERT_TRUE(coll1->add(doc.dump()).ok());
+    }
+
+    auto results = coll1->search("jour", {"title"}, "", {}, {}, 0, 10, 1, FREQUENCY).get();
+
+    ASSERT_EQ(1, results["found"].get<size_t>());
+    ASSERT_EQ(1, results["hits"].size());
+
+    ASSERT_STREQ("Mise à  <mark>jour</mark>  Timy depuis PC",
+                 results["hits"][0]["highlights"][0]["snippet"].get<std::string>().c_str());
+
+    collectionManager.drop_collection("coll1");
+}
diff --git a/test/tokenizer_test.cpp b/test/tokenizer_test.cpp
index c9c0ea74..e7f794c7 100644
--- a/test/tokenizer_test.cpp
+++ b/test/tokenizer_test.cpp
@@ -42,6 +42,26 @@ TEST(TokenizerTest, ShouldTokenizeNormalizeDifferentStrings) {
     ASSERT_STREQ("abcaa123ss12", tokens[5].c_str());
     ASSERT_STREQ("here", tokens[6].c_str());
 
+    // when normalization is disabled and keep empty is enabled
+    const std::string withoutnormalize = "Mise  à  jour.";
+    tokens.clear();
+    Tokenizer(withoutnormalize, true, false, false).tokenize(tokens);
+    ASSERT_EQ(5, tokens.size());
+    ASSERT_STREQ("Mise", tokens[0].c_str());
+    ASSERT_STREQ("", tokens[1].c_str());
+    ASSERT_STREQ("à", tokens[2].c_str());
+    ASSERT_STREQ("", tokens[3].c_str());
+    ASSERT_STREQ("jour.", tokens[4].c_str());
+
+    // when normalization and keep empty are disabled
+    const std::string withoutnormalizeandkeepempty = "Mise  à  jour.";
+    tokens.clear();
+    Tokenizer(withoutnormalizeandkeepempty, false, false, false).tokenize(tokens);
+    ASSERT_EQ(3, tokens.size());
+    ASSERT_STREQ("Mise", tokens[0].c_str());
+    ASSERT_STREQ("à", tokens[1].c_str());
+    ASSERT_STREQ("jour.", tokens[2].c_str());
+
     // noop
 
     tokens.clear();