From 7c4d0f928605678a1fb2368f2b2077b75f34cd3a Mon Sep 17 00:00:00 2001 From: kishorenc Date: Wed, 25 Sep 2019 20:41:58 +0530 Subject: [PATCH] Fixed a bug with unicode search. --- src/art.cpp | 7 ++++--- test/art_test.cpp | 27 +++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 3 deletions(-) diff --git a/src/art.cpp b/src/art.cpp index 541a3c99..a500817b 100644 --- a/src/art.cpp +++ b/src/art.cpp @@ -32,7 +32,7 @@ enum recurse_progress { RECURSE, ABORT, ITERATE }; -static void art_fuzzy_recurse(char p, char c, const art_node *n, int depth, const unsigned char *term, +static void art_fuzzy_recurse(unsigned char p, unsigned char c, const art_node *n, int depth, const unsigned char *term, const int term_len, const int* irow, const int* jrow, const int min_cost, const int max_cost, const bool prefix, std::vector &results); @@ -1144,7 +1144,8 @@ static inline void copyIntArray2(const int *src, int *dest, const int len) { } } -static inline int levenshtein_dist(const int depth, const char p, const char c, const unsigned char* term, const int term_len, +static inline int levenshtein_dist(const int depth, const unsigned char p, const unsigned char c, + const unsigned char* term, const int term_len, const int* irow, const int* jrow, int* krow) { int row_min = std::numeric_limits::max(); const int columns = term_len+1; @@ -1234,7 +1235,7 @@ static inline void rotate(int &i, int &j, int &k) { // e.g. catapult against coratapult // e.g. microafot against microsoft -static void art_fuzzy_recurse(char p, char c, const art_node *n, int depth, const unsigned char *term, +static void art_fuzzy_recurse(unsigned char p, unsigned char c, const art_node *n, int depth, const unsigned char *term, const int term_len, const int* irow, const int* jrow, const int min_cost, const int max_cost, const bool prefix, std::vector &results) { const int columns = term_len+1; diff --git a/test/art_test.cpp b/test/art_test.cpp index 46781fa0..ca690fb9 100644 --- a/test/art_test.cpp +++ b/test/art_test.cpp @@ -723,6 +723,33 @@ TEST(ArtTest, test_art_fuzzy_search) { ASSERT_TRUE(res == 0); } +TEST(ArtTest, test_art_fuzzy_search_unicode_chars) { + art_tree t; + int res = art_tree_init(&t); + ASSERT_TRUE(res == 0); + + std::vector keys = { + "роман", "обладать", "роисхождения", "без", "பஞ்சமம்", "சுதந்திரமாகவே", "அல்லது", "அடிப்படையில்" + }; + + for(const char* key: keys) { + art_document doc = get_document((uint32_t) 1); + ASSERT_TRUE(NULL == art_insert(&t, (unsigned char*)key, strlen(key)+1, &doc, 1)); + } + + for(const char* key: keys) { + art_leaf* l = (art_leaf *) art_search(&t, (const unsigned char *)key, strlen(key)+1); + EXPECT_EQ(1, l->values->ids.at(0)); + + std::vector leaves; + art_fuzzy_search(&t, (unsigned char *)key, strlen(key), 0, 1, 10, FREQUENCY, true, leaves); + ASSERT_EQ(1, leaves.size()); + } + + res = art_tree_destroy(&t); + ASSERT_TRUE(res == 0); +} + TEST(ArtTest, test_encode_int32) { unsigned char chars[8];