Fixed a bug with unicode search.

This commit is contained in:
kishorenc 2019-09-25 20:41:58 +05:30
parent 9cc900ad51
commit 7c4d0f9286
2 changed files with 31 additions and 3 deletions

View File

@ -32,7 +32,7 @@
enum recurse_progress { RECURSE, ABORT, ITERATE };
static void art_fuzzy_recurse(char p, char c, const art_node *n, int depth, const unsigned char *term,
static void art_fuzzy_recurse(unsigned char p, unsigned char c, const art_node *n, int depth, const unsigned char *term,
const int term_len, const int* irow, const int* jrow, const int min_cost,
const int max_cost, const bool prefix, std::vector<const art_node *> &results);
@ -1144,7 +1144,8 @@ static inline void copyIntArray2(const int *src, int *dest, const int len) {
}
}
static inline int levenshtein_dist(const int depth, const char p, const char c, const unsigned char* term, const int term_len,
static inline int levenshtein_dist(const int depth, const unsigned char p, const unsigned char c,
const unsigned char* term, const int term_len,
const int* irow, const int* jrow, int* krow) {
int row_min = std::numeric_limits<int>::max();
const int columns = term_len+1;
@ -1234,7 +1235,7 @@ static inline void rotate(int &i, int &j, int &k) {
// e.g. catapult against coratapult
// e.g. microafot against microsoft
static void art_fuzzy_recurse(char p, char c, const art_node *n, int depth, const unsigned char *term,
static void art_fuzzy_recurse(unsigned char p, unsigned char c, const art_node *n, int depth, const unsigned char *term,
const int term_len, const int* irow, const int* jrow, const int min_cost,
const int max_cost, const bool prefix, std::vector<const art_node *> &results) {
const int columns = term_len+1;

View File

@ -723,6 +723,33 @@ TEST(ArtTest, test_art_fuzzy_search) {
ASSERT_TRUE(res == 0);
}
TEST(ArtTest, test_art_fuzzy_search_unicode_chars) {
art_tree t;
int res = art_tree_init(&t);
ASSERT_TRUE(res == 0);
std::vector<const char*> keys = {
"роман", "обладать", "роисхождения", "без", "பஞ்சமம்", "சுதந்திரமாகவே", "அல்லது", "அடிப்படையில்"
};
for(const char* key: keys) {
art_document doc = get_document((uint32_t) 1);
ASSERT_TRUE(NULL == art_insert(&t, (unsigned char*)key, strlen(key)+1, &doc, 1));
}
for(const char* key: keys) {
art_leaf* l = (art_leaf *) art_search(&t, (const unsigned char *)key, strlen(key)+1);
EXPECT_EQ(1, l->values->ids.at(0));
std::vector<art_leaf*> leaves;
art_fuzzy_search(&t, (unsigned char *)key, strlen(key), 0, 1, 10, FREQUENCY, true, leaves);
ASSERT_EQ(1, leaves.size());
}
res = art_tree_destroy(&t);
ASSERT_TRUE(res == 0);
}
TEST(ArtTest, test_encode_int32) {
unsigned char chars[8];