mirror of
https://github.com/typesense/typesense.git
synced 2025-05-16 11:28:44 +08:00
Fixed a bug with unicode search.
This commit is contained in:
parent
9cc900ad51
commit
7c4d0f9286
@ -32,7 +32,7 @@
|
||||
|
||||
enum recurse_progress { RECURSE, ABORT, ITERATE };
|
||||
|
||||
static void art_fuzzy_recurse(char p, char c, const art_node *n, int depth, const unsigned char *term,
|
||||
static void art_fuzzy_recurse(unsigned char p, unsigned char c, const art_node *n, int depth, const unsigned char *term,
|
||||
const int term_len, const int* irow, const int* jrow, const int min_cost,
|
||||
const int max_cost, const bool prefix, std::vector<const art_node *> &results);
|
||||
|
||||
@ -1144,7 +1144,8 @@ static inline void copyIntArray2(const int *src, int *dest, const int len) {
|
||||
}
|
||||
}
|
||||
|
||||
static inline int levenshtein_dist(const int depth, const char p, const char c, const unsigned char* term, const int term_len,
|
||||
static inline int levenshtein_dist(const int depth, const unsigned char p, const unsigned char c,
|
||||
const unsigned char* term, const int term_len,
|
||||
const int* irow, const int* jrow, int* krow) {
|
||||
int row_min = std::numeric_limits<int>::max();
|
||||
const int columns = term_len+1;
|
||||
@ -1234,7 +1235,7 @@ static inline void rotate(int &i, int &j, int &k) {
|
||||
|
||||
// e.g. catapult against coratapult
|
||||
// e.g. microafot against microsoft
|
||||
static void art_fuzzy_recurse(char p, char c, const art_node *n, int depth, const unsigned char *term,
|
||||
static void art_fuzzy_recurse(unsigned char p, unsigned char c, const art_node *n, int depth, const unsigned char *term,
|
||||
const int term_len, const int* irow, const int* jrow, const int min_cost,
|
||||
const int max_cost, const bool prefix, std::vector<const art_node *> &results) {
|
||||
const int columns = term_len+1;
|
||||
|
@ -723,6 +723,33 @@ TEST(ArtTest, test_art_fuzzy_search) {
|
||||
ASSERT_TRUE(res == 0);
|
||||
}
|
||||
|
||||
TEST(ArtTest, test_art_fuzzy_search_unicode_chars) {
|
||||
art_tree t;
|
||||
int res = art_tree_init(&t);
|
||||
ASSERT_TRUE(res == 0);
|
||||
|
||||
std::vector<const char*> keys = {
|
||||
"роман", "обладать", "роисхождения", "без", "பஞ்சமம்", "சுதந்திரமாகவே", "அல்லது", "அடிப்படையில்"
|
||||
};
|
||||
|
||||
for(const char* key: keys) {
|
||||
art_document doc = get_document((uint32_t) 1);
|
||||
ASSERT_TRUE(NULL == art_insert(&t, (unsigned char*)key, strlen(key)+1, &doc, 1));
|
||||
}
|
||||
|
||||
for(const char* key: keys) {
|
||||
art_leaf* l = (art_leaf *) art_search(&t, (const unsigned char *)key, strlen(key)+1);
|
||||
EXPECT_EQ(1, l->values->ids.at(0));
|
||||
|
||||
std::vector<art_leaf*> leaves;
|
||||
art_fuzzy_search(&t, (unsigned char *)key, strlen(key), 0, 1, 10, FREQUENCY, true, leaves);
|
||||
ASSERT_EQ(1, leaves.size());
|
||||
}
|
||||
|
||||
res = art_tree_destroy(&t);
|
||||
ASSERT_TRUE(res == 0);
|
||||
}
|
||||
|
||||
TEST(ArtTest, test_encode_int32) {
|
||||
unsigned char chars[8];
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user