mirror of
https://github.com/typesense/typesense.git
synced 2025-05-17 20:22:32 +08:00
Support signed ints in art int search.
This commit is contained in:
parent
85d64608fd
commit
0fcdb6b479
@ -242,9 +242,9 @@ int art_fuzzy_search(art_tree *t, const unsigned char *term, const int term_len,
|
||||
static int art_topk_iter(const art_node *root, token_ordering token_order, const int max_results,
|
||||
std::vector<art_leaf *> &results);
|
||||
|
||||
void encode_int32(uint32_t n, unsigned char *chars);
|
||||
void encode_int32(int32_t n, unsigned char *chars);
|
||||
|
||||
int art_int32_search(art_tree *t, uint32_t value, int compare, std::vector<const art_leaf *> &results);
|
||||
int art_int32_search(art_tree *t, int32_t value, int compare, std::vector<const art_leaf *> &results);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
35
src/art.cpp
35
src/art.cpp
@ -28,14 +28,14 @@
|
||||
|
||||
#define microseconds std::chrono::duration_cast<std::chrono::microseconds>
|
||||
|
||||
enum recurse_progress { CONTINUE, ABORT, ITERATE };
|
||||
enum recurse_progress { RECURSE, ABORT, ITERATE };
|
||||
|
||||
static void art_fuzzy_recurse(char p, char c, const art_node *n, int depth, const unsigned char *term,
|
||||
const int term_len, const int* irow, const int* jrow, const int min_cost,
|
||||
const int max_cost, const bool prefix, std::vector<const art_node *> &results);
|
||||
|
||||
void art_int_fuzzy_recurse(art_node *n, int depth, unsigned char* int_str, int int_str_len,
|
||||
uint32_t compare, std::vector<const art_leaf *> &results);
|
||||
int32_t compare, std::vector<const art_leaf *> &results);
|
||||
|
||||
bool compare_art_leaf_frequency(const art_leaf *a, const art_leaf *b) {
|
||||
return a->values->ids.getLength() > b->values->ids.getLength();
|
||||
@ -1338,7 +1338,7 @@ int art_fuzzy_search(art_tree *t, const unsigned char *term, const int term_len,
|
||||
}
|
||||
|
||||
long long int time_micro = microseconds(std::chrono::high_resolution_clock::now() - begin).count();
|
||||
std::cout << "Time taken for fuzz: " << time_micro << "us, size of nodes: " << nodes.size() << std::endl;
|
||||
//!std::cout << "Time taken for fuzz: " << time_micro << "us, size of nodes: " << nodes.size() << std::endl;
|
||||
|
||||
begin = std::chrono::high_resolution_clock::now();
|
||||
|
||||
@ -1353,11 +1353,11 @@ int art_fuzzy_search(art_tree *t, const unsigned char *term, const int term_len,
|
||||
}
|
||||
|
||||
time_micro = microseconds(std::chrono::high_resolution_clock::now() - begin).count();
|
||||
std::cout << "Time taken for art_topk_iter: " << time_micro << "us" << std::endl;
|
||||
//!std::cout << "Time taken for art_topk_iter: " << time_micro << "us" << std::endl;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void encode_int32(uint32_t n, unsigned char *chars) {
|
||||
void encode_int32(int32_t n, unsigned char *chars) {
|
||||
unsigned char symbols[16] = {
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
|
||||
};
|
||||
@ -1380,16 +1380,18 @@ void encode_int32(uint32_t n, unsigned char *chars) {
|
||||
chars[8] = 46;
|
||||
}
|
||||
|
||||
// Implements ==, <= and >=
|
||||
recurse_progress matches(char a, char b, int compare) {
|
||||
switch(compare) {
|
||||
case -1:
|
||||
if (a == b) return CONTINUE;
|
||||
if (a == b) return RECURSE;
|
||||
else if(a < b) return ITERATE;
|
||||
return ABORT;
|
||||
case 0:
|
||||
if(a == b) return CONTINUE;
|
||||
if(a == b) return RECURSE;
|
||||
return ABORT;
|
||||
case 1:
|
||||
if (a == b) return CONTINUE;
|
||||
if (a == b) return RECURSE;
|
||||
else if(a > b) return ITERATE;
|
||||
return ABORT;
|
||||
default:
|
||||
@ -1444,7 +1446,7 @@ static void art_iter(const art_node *n, std::vector<const art_leaf *> &results)
|
||||
}
|
||||
|
||||
static inline void art_int_fuzzy_children(const art_node *n, int depth, unsigned char* int_str, int int_str_len,
|
||||
uint32_t compare, std::vector<const art_leaf *> &results) {
|
||||
int32_t compare, std::vector<const art_leaf *> &results) {
|
||||
char child_char;
|
||||
art_node* child;
|
||||
|
||||
@ -1456,7 +1458,7 @@ static inline void art_int_fuzzy_children(const art_node *n, int depth, unsigned
|
||||
printf("\n4!child_char: %c, %d, depth: %d", child_char, child_char, depth);
|
||||
child = ((art_node4*)n)->children[i];
|
||||
recurse_progress progress = matches(child_char, int_str[depth], compare);
|
||||
if(progress == CONTINUE) {
|
||||
if(progress == RECURSE) {
|
||||
art_int_fuzzy_recurse(child, depth+1, int_str, int_str_len, compare, results);
|
||||
} else if(progress == ITERATE) {
|
||||
art_iter(child, results);
|
||||
@ -1470,7 +1472,7 @@ static inline void art_int_fuzzy_children(const art_node *n, int depth, unsigned
|
||||
printf("\n16!child_char: %c, depth: %d", child_char, depth);
|
||||
child = ((art_node16*)n)->children[i];
|
||||
recurse_progress progress = matches(child_char, int_str[depth], compare);
|
||||
if(progress == CONTINUE) {
|
||||
if(progress == RECURSE) {
|
||||
art_int_fuzzy_recurse(child, depth+1, int_str, int_str_len, compare, results);
|
||||
} else if(progress == ITERATE) {
|
||||
art_iter(child, results);
|
||||
@ -1486,7 +1488,7 @@ static inline void art_int_fuzzy_children(const art_node *n, int depth, unsigned
|
||||
child_char = (char)i;
|
||||
printf("\n48!child_char: %c, depth: %d, ix: %d", child_char, depth, ix);
|
||||
recurse_progress progress = matches(child_char, int_str[depth], compare);
|
||||
if(progress == CONTINUE) {
|
||||
if(progress == RECURSE) {
|
||||
art_int_fuzzy_recurse(child, depth+1, int_str, int_str_len, compare, results);
|
||||
} else if(progress == ITERATE) {
|
||||
art_iter(child, results);
|
||||
@ -1501,7 +1503,7 @@ static inline void art_int_fuzzy_children(const art_node *n, int depth, unsigned
|
||||
printf("\n256!child_char: %c, depth: %d", child_char, depth);
|
||||
child = ((art_node256*)n)->children[i];
|
||||
recurse_progress progress = matches(child_char, int_str[depth], compare);
|
||||
if(progress == CONTINUE) {
|
||||
if(progress == RECURSE) {
|
||||
art_int_fuzzy_recurse(child, depth+1, int_str, int_str_len, compare, results);
|
||||
} else if(progress == ITERATE) {
|
||||
art_iter(child, results);
|
||||
@ -1514,13 +1516,12 @@ static inline void art_int_fuzzy_children(const art_node *n, int depth, unsigned
|
||||
}
|
||||
|
||||
void art_int_fuzzy_recurse(art_node *n, int depth, unsigned char* int_str, int int_str_len,
|
||||
uint32_t compare, std::vector<const art_leaf*> &results) {
|
||||
int32_t compare, std::vector<const art_leaf*> &results) {
|
||||
if (!n) return ;
|
||||
|
||||
if(IS_LEAF(n)) {
|
||||
art_leaf *l = (art_leaf *) LEAF_RAW(n);
|
||||
const int end_index = min(l->key_len, int_str_len);
|
||||
while(depth < end_index) {
|
||||
while(depth < int_str_len) {
|
||||
char c = l->key[depth];
|
||||
recurse_progress progress = matches(c, int_str[depth], compare);
|
||||
if(progress == ABORT) {
|
||||
@ -1559,7 +1560,7 @@ void art_int_fuzzy_recurse(art_node *n, int depth, unsigned char* int_str, int i
|
||||
art_int_fuzzy_children(n, depth, int_str, int_str_len, compare, results);
|
||||
}
|
||||
|
||||
int art_int32_search(art_tree *t, uint32_t value, int compare, std::vector<const art_leaf *> &results) {
|
||||
int art_int32_search(art_tree *t, int32_t value, int compare, std::vector<const art_leaf *> &results) {
|
||||
unsigned char chars[9];
|
||||
encode_int32(value, chars);
|
||||
art_int_fuzzy_recurse(t->root, 0, chars, 9, compare, results);
|
||||
|
@ -215,8 +215,8 @@ std::vector<nlohmann::json> Collection::search(std::string query, const std::vec
|
||||
}
|
||||
|
||||
long long int timeMillis = std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::high_resolution_clock::now() - begin).count();
|
||||
std::cout << "Time taken for result calc: " << timeMillis << "us" << std::endl;
|
||||
store->print_memory_usage();
|
||||
//!std::cout << "Time taken for result calc: " << timeMillis << "us" << std::endl;
|
||||
//!store->print_memory_usage();
|
||||
return results;
|
||||
}
|
||||
|
||||
@ -299,7 +299,7 @@ void Collection::search(std::string & query, const std::string & field, const in
|
||||
}
|
||||
|
||||
if(!leaves.empty()) {
|
||||
log_leaves(costs[token_index], token, leaves);
|
||||
//!log_leaves(costs[token_index], token, leaves);
|
||||
token_leaves.push_back(leaves);
|
||||
token_to_count[token] = leaves.at(0)->values->ids.getLength();
|
||||
} else {
|
||||
|
@ -759,6 +759,41 @@ TEST(ArtTest, test_int_range_hundreds) {
|
||||
ASSERT_TRUE(res == 0);
|
||||
}
|
||||
|
||||
TEST(ArtTest, test_int_negative_ints) {
|
||||
art_tree t;
|
||||
art_tree_init(&t);
|
||||
|
||||
art_document doc = get_document(1);
|
||||
const int CHAR_LEN = 9;
|
||||
unsigned char chars[CHAR_LEN];
|
||||
|
||||
for(int32_t i = -100; i < 0; i++) {
|
||||
encode_int32(i, chars);
|
||||
ASSERT_TRUE(NULL == art_insert(&t, (unsigned char*)chars, CHAR_LEN, &doc, 1));
|
||||
}
|
||||
|
||||
encode_int32(-99, chars);
|
||||
|
||||
std::vector<const art_leaf*> results;
|
||||
|
||||
int res = art_int32_search(&t, -99, 0, results);
|
||||
ASSERT_TRUE(res == 0);
|
||||
ASSERT_EQ(1, results.size());
|
||||
results.clear();
|
||||
|
||||
res = art_int32_search(&t, -90, 1, results);
|
||||
ASSERT_TRUE(res == 0);
|
||||
ASSERT_EQ(90, results.size());
|
||||
results.clear();
|
||||
|
||||
res = art_int32_search(&t, -99, -1, results);
|
||||
ASSERT_TRUE(res == 0);
|
||||
ASSERT_EQ(2, results.size());
|
||||
|
||||
res = art_tree_destroy(&t);
|
||||
ASSERT_TRUE(res == 0);
|
||||
}
|
||||
|
||||
TEST(ArtTest, test_int_range_millions) {
|
||||
art_tree t;
|
||||
art_tree_init(&t);
|
||||
|
Loading…
x
Reference in New Issue
Block a user