diff --git a/include/art.h b/include/art.h index 54e82fbd..cab6dec6 100644 --- a/include/art.h +++ b/include/art.h @@ -127,6 +127,14 @@ enum token_ordering { MAX_SCORE }; +enum NUM_COMPARATOR { + LESS_THAN, + LESS_THAN_EQUALS, + EQUALS, + GREATER_THAN, + GREATER_THAN_EQUALS +}; + /** * Initializes an ART tree * @return 0 on success. @@ -244,7 +252,11 @@ static int art_topk_iter(const art_node *root, token_ordering token_order, const void encode_int32(int32_t n, unsigned char *chars); -int art_int32_search(art_tree *t, int32_t value, int compare, std::vector &results); +void encode_int64(int64_t n, unsigned char *chars); + +int art_int32_search(art_tree *t, int32_t value, NUM_COMPARATOR comparator, std::vector &results); + +int art_int64_search(art_tree *t, int64_t value, NUM_COMPARATOR comparator, std::vector &results); #ifdef __cplusplus } diff --git a/src/art.cpp b/src/art.cpp index 86735eaf..fa3f012c 100644 --- a/src/art.cpp +++ b/src/art.cpp @@ -35,7 +35,7 @@ static void art_fuzzy_recurse(char p, char c, const art_node *n, int depth, cons const int max_cost, const bool prefix, std::vector &results); void art_int_fuzzy_recurse(art_node *n, int depth, const unsigned char* int_str, int int_str_len, - int32_t compare, std::vector &results); + NUM_COMPARATOR comparator, std::vector &results); bool compare_art_leaf_frequency(const art_leaf *a, const art_leaf *b) { return a->values->ids.getLength() > b->values->ids.getLength(); @@ -205,6 +205,10 @@ int art_tree_destroy(art_tree *t) { #ifndef BROKEN_GCC_C99_INLINE extern inline uint64_t art_size(art_tree *t); + +void compare_and_match_leaf(const unsigned char *int_str, int int_str_len, const NUM_COMPARATOR &comparator, + std::vector &results, const art_leaf *l); + #endif static art_node** find_child(art_node *n, unsigned char c) { @@ -1375,17 +1379,32 @@ void encode_int32(int32_t n, unsigned char *chars) { } } +void encode_int64(int64_t n, unsigned char *chars) { + union { + int64_t l; + unsigned char bytes[8]; + } container; + + container.l = n; + + for(uint32_t i = 0; i < 8; i++) { + chars[7-i] = container.bytes[i]; + } +} + // Implements ==, <= and >= -recurse_progress matches(unsigned char a, unsigned char b, int compare) { - switch(compare) { - case -1: +recurse_progress matches(unsigned char a, unsigned char b, NUM_COMPARATOR comparator) { + switch(comparator) { + case LESS_THAN: + case LESS_THAN_EQUALS: if (a == b) return RECURSE; else if(a < b) return ITERATE; return ABORT; - case 0: + case EQUALS: if(a == b) return RECURSE; return ABORT; - case 1: + case GREATER_THAN: + case GREATER_THAN_EQUALS: if (a == b) return RECURSE; else if(a > b) return ITERATE; return ABORT; @@ -1395,12 +1414,13 @@ recurse_progress matches(unsigned char a, unsigned char b, int compare) { } -static void art_iter(const art_node *n, std::vector &results) { +static void art_iter(const art_node *n, const unsigned char* int_str, int int_str_len, NUM_COMPARATOR comparator, + std::vector &results) { // Handle base cases if (!n) return ; if (IS_LEAF(n)) { art_leaf *l = (art_leaf *) LEAF_RAW(n); - results.push_back(l); + compare_and_match_leaf(int_str, int_str_len, comparator, results, l); return ; } @@ -1408,13 +1428,13 @@ static void art_iter(const art_node *n, std::vector &results) switch (n->type) { case NODE4: for (int i=0; i < n->num_children; i++) { - art_iter(((art_node4 *) n)->children[i], results); + art_iter(((art_node4 *) n)->children[i], int_str, int_str_len, comparator, results); } break; case NODE16: for (int i=0; i < n->num_children; i++) { - art_iter(((art_node16 *) n)->children[i], results); + art_iter(((art_node16 *) n)->children[i], int_str, int_str_len, comparator, results); } break; @@ -1422,14 +1442,14 @@ static void art_iter(const art_node *n, std::vector &results) for (int i=0; i < 256; i++) { idx = ((art_node48*)n)->keys[i]; if (!idx) continue; - art_iter(((art_node48 *) n)->children[idx - 1], results); + art_iter(((art_node48 *) n)->children[idx - 1], int_str, int_str_len, comparator, results); } break; case NODE256: for (int i=0; i < 256; i++) { if (!((art_node256*)n)->children[i]) continue; - art_iter(((art_node256 *) n)->children[i], results); + art_iter(((art_node256 *) n)->children[i], int_str, int_str_len, comparator, results); } break; @@ -1441,8 +1461,8 @@ static void art_iter(const art_node *n, std::vector &results) } static inline void art_int_fuzzy_children(const art_node *n, int depth, const unsigned char* int_str, int int_str_len, - int32_t compare, std::vector &results) { - char child_char; + NUM_COMPARATOR comparator, std::vector &results) { + unsigned char child_char; art_node* child; switch (n->type) { @@ -1452,11 +1472,11 @@ static inline void art_int_fuzzy_children(const art_node *n, int depth, const un child_char = ((art_node4*)n)->keys[i]; printf("\n4!child_char: %c, %d, depth: %d", child_char, child_char, depth); child = ((art_node4*)n)->children[i]; - recurse_progress progress = matches(child_char, int_str[depth], compare); + recurse_progress progress = matches(child_char, int_str[depth], comparator); if(progress == RECURSE) { - art_int_fuzzy_recurse(child, depth+1, int_str, int_str_len, compare, results); + art_int_fuzzy_recurse(child, depth+1, int_str, int_str_len, comparator, results); } else if(progress == ITERATE) { - art_iter(child, results); + art_iter(child, int_str, int_str_len, comparator, results); } } break; @@ -1466,11 +1486,11 @@ static inline void art_int_fuzzy_children(const art_node *n, int depth, const un child_char = ((art_node16*)n)->keys[i]; printf("\n16!child_char: %c, depth: %d", child_char, depth); child = ((art_node16*)n)->children[i]; - recurse_progress progress = matches(child_char, int_str[depth], compare); + recurse_progress progress = matches(child_char, int_str[depth], comparator); if(progress == RECURSE) { - art_int_fuzzy_recurse(child, depth+1, int_str, int_str_len, compare, results); + art_int_fuzzy_recurse(child, depth+1, int_str, int_str_len, comparator, results); } else if(progress == ITERATE) { - art_iter(child, results); + art_iter(child, int_str, int_str_len, comparator, results); } } break; @@ -1480,13 +1500,13 @@ static inline void art_int_fuzzy_children(const art_node *n, int depth, const un int ix = ((art_node48*)n)->keys[i]; if (!ix) continue; child = ((art_node48*)n)->children[ix - 1]; - child_char = (char)i; + child_char = (unsigned char)i; printf("\n48!child_char: %c, depth: %d, ix: %d", child_char, depth, ix); - recurse_progress progress = matches(child_char, int_str[depth], compare); + recurse_progress progress = matches(child_char, int_str[depth], comparator); if(progress == RECURSE) { - art_int_fuzzy_recurse(child, depth+1, int_str, int_str_len, compare, results); + art_int_fuzzy_recurse(child, depth+1, int_str, int_str_len, comparator, results); } else if(progress == ITERATE) { - art_iter(child, results); + art_iter(child, int_str, int_str_len, comparator, results); } } break; @@ -1494,14 +1514,14 @@ static inline void art_int_fuzzy_children(const art_node *n, int depth, const un printf("\nNODE256\n"); for (int i=255; i >= 0; i--) { if (!((art_node256*)n)->children[i]) continue; - child_char = (char) i; + child_char = (unsigned char) i; printf("\n256!child_char: %c, depth: %d", child_char, depth); child = ((art_node256*)n)->children[i]; - recurse_progress progress = matches(child_char, int_str[depth], compare); + recurse_progress progress = matches(child_char, int_str[depth], comparator); if(progress == RECURSE) { - art_int_fuzzy_recurse(child, depth+1, int_str, int_str_len, compare, results); + art_int_fuzzy_recurse(child, depth+1, int_str, int_str_len, comparator, results); } else if(progress == ITERATE) { - art_iter(child, results); + art_iter(child, int_str, int_str_len, comparator, results); } } break; @@ -1511,14 +1531,14 @@ static inline void art_int_fuzzy_children(const art_node *n, int depth, const un } void art_int_fuzzy_recurse(art_node *n, int depth, const unsigned char* int_str, int int_str_len, - int32_t compare, std::vector &results) { + NUM_COMPARATOR comparator, std::vector &results) { if (!n) return ; if(IS_LEAF(n)) { art_leaf *l = (art_leaf *) LEAF_RAW(n); while(depth < int_str_len) { - char c = l->key[depth]; - recurse_progress progress = matches(c, int_str[depth], compare); + unsigned char c = l->key[depth]; + recurse_progress progress = matches(c, int_str[depth], comparator); if(progress == ABORT) { return; } @@ -1530,7 +1550,7 @@ void art_int_fuzzy_recurse(art_node *n, int depth, const unsigned char* int_str, depth++; } - results.push_back(l); + compare_and_match_leaf(int_str, int_str_len, comparator, results, l); return ; } @@ -1540,24 +1560,45 @@ void art_int_fuzzy_recurse(art_node *n, int depth, const unsigned char* int_str, printf("\npartial_len: %d", partial_len); for(int idx=0; idxpartial[idx]; - recurse_progress progress = matches(c, int_str[depth+idx], compare); + unsigned char c = n->partial[idx]; + recurse_progress progress = matches(c, int_str[depth+idx], comparator); if(progress == ABORT) { return; } if(progress == ITERATE) { - return art_iter(n, results); + return art_iter(n, int_str, int_str_len, comparator, results); } } depth += n->partial_len; - art_int_fuzzy_children(n, depth, int_str, int_str_len, compare, results); + art_int_fuzzy_children(n, depth, int_str, int_str_len, comparator, results); } -int art_int32_search(art_tree *t, int32_t value, int compare, std::vector &results) { +void compare_and_match_leaf(const unsigned char *int_str, int int_str_len, const NUM_COMPARATOR &comparator, + std::vector &results, const art_leaf *l) { + if(comparator == LESS_THAN || comparator == GREATER_THAN) { + for(auto i = 0; i < l->key_len; i++) { + if(int_str[i] != l->key[i]) { + results.push_back(l); + return ; + } + } + } else { + results.push_back(l); + } +} + +int art_int32_search(art_tree *t, int32_t value, NUM_COMPARATOR comparator, std::vector &results) { unsigned char chars[8]; encode_int32(value, chars); - art_int_fuzzy_recurse(t->root, 0, chars, 8, compare, results); + art_int_fuzzy_recurse(t->root, 0, chars, 8, comparator, results); return 0; } + +int art_int64_search(art_tree *t, int64_t value, NUM_COMPARATOR comparator, std::vector &results) { + unsigned char chars[8]; + encode_int64(value, chars); + art_int_fuzzy_recurse(t->root, 0, chars, 8, comparator, results); + return 0; +} \ No newline at end of file diff --git a/test/art_test.cpp b/test/art_test.cpp index 432e0a8f..40e023e4 100644 --- a/test/art_test.cpp +++ b/test/art_test.cpp @@ -685,46 +685,52 @@ TEST(ArtTest, test_art_fuzzy_search) { ASSERT_TRUE(res == 0); } -TEST(ArtTest, test_encode_int) { +TEST(ArtTest, test_encode_int32) { unsigned char chars[8]; - // 175 => 0000,0000,0000,0000,0000,0000,1010,1111,\0 + // 175 => 0000,0000,0000,0000,0000,0000,1010,1111 unsigned char chars_175[8] = {0, 0, 0, 0, 0, 0, 10, 15}; encode_int32(175, chars); for(uint32_t i = 0; i < 8; i++) { ASSERT_EQ(chars_175[i], chars[i]); } - // 0 => 0000,0000,0000,0000,0000,0000,0000,0000,\0 + // 0 => 0000,0000,0000,0000,0000,0000,0000,0000 unsigned char chars_0[8] = {0, 0, 0, 0, 0, 0, 0, 0}; encode_int32(0, chars); for(uint32_t i = 0; i < 8; i++) { ASSERT_EQ(chars_0[i], chars[i]); } - // 255 => 0000,0000,0000,0000,0000,0000,1111,1111,\0 + // 255 => 0000,0000,0000,0000,0000,0000,1111,1111 unsigned char chars_255[8] = {0, 0, 0, 0, 0, 0, 15, 15}; encode_int32(255, chars); for(uint32_t i = 0; i < 8; i++) { ASSERT_EQ(chars_255[i], chars[i]); } - // 4531 => 0000,0000,0000,0000,0001,0001,1011,0011,\0 + // 4531 => 0000,0000,0000,0000,0001,0001,1011,0011 unsigned char chars_4531[8] = {0, 0, 0, 0, 1, 1, 11, 3}; encode_int32(4531, chars); for(uint32_t i = 0; i < 8; i++) { ASSERT_EQ(chars_4531[i], chars[i]); } - // 1200000 => 0000,0000,0001,0010,0100,1111,1000,0000,\0 + // 1200000 => 0000,0000,0001,0010,0100,1111,1000,0000 unsigned char chars_1M[8] = {0, 0, 1, 2, 4, 15, 8, 0}; encode_int32(1200000, chars); for(uint32_t i = 0; i < 8; i++) { ASSERT_EQ(chars_1M[i], chars[i]); } + + unsigned char chars_neg_4531[8] = {15, 15, 15, 15, 14, 14, 4, 13}; + encode_int32(-4531, chars); + for(uint32_t i = 0; i < 8; i++) { + ASSERT_EQ(chars_neg_4531[i], chars[i]); + } } -TEST(ArtTest, test_int_range_hundreds) { +TEST(ArtTest, test_int32_range_hundreds) { art_tree t; art_tree_init(&t); @@ -741,25 +747,36 @@ TEST(ArtTest, test_int_range_hundreds) { std::vector results; - int res = art_int32_search(&t, 106, 0, results); + int res = art_int32_search(&t, 106, EQUALS, results); ASSERT_TRUE(res == 0); ASSERT_EQ(1, results.size()); results.clear(); - res = art_int32_search(&t, 106, 1, results); + res = art_int32_search(&t, 106, GREATER_THAN_EQUALS, results); ASSERT_TRUE(res == 0); ASSERT_EQ(4, results.size()); results.clear(); - res = art_int32_search(&t, 106, -1, results); + res = art_int32_search(&t, 106, GREATER_THAN, results); + ASSERT_TRUE(res == 0); + ASSERT_EQ(3, results.size()); + results.clear(); + + res = art_int32_search(&t, 106, LESS_THAN_EQUALS, results); ASSERT_TRUE(res == 0); ASSERT_EQ(7, results.size()); + results.clear(); + + res = art_int32_search(&t, 106, LESS_THAN, results); + ASSERT_TRUE(res == 0); + + ASSERT_EQ(6, results.size()); res = art_tree_destroy(&t); ASSERT_TRUE(res == 0); } -TEST(ArtTest, test_int_negative_ints) { +TEST(ArtTest, test_int32_negative) { art_tree t; art_tree_init(&t); @@ -776,25 +793,41 @@ TEST(ArtTest, test_int_negative_ints) { std::vector results; - int res = art_int32_search(&t, -99, 0, results); + int res = art_int32_search(&t, -99, EQUALS, results); ASSERT_TRUE(res == 0); ASSERT_EQ(1, results.size()); results.clear(); - res = art_int32_search(&t, -90, 1, results); + res = art_int32_search(&t, -90, GREATER_THAN_EQUALS, results); ASSERT_TRUE(res == 0); ASSERT_EQ(90, results.size()); results.clear(); - res = art_int32_search(&t, -99, -1, results); + res = art_int32_search(&t, -90, GREATER_THAN, results); + ASSERT_TRUE(res == 0); + ASSERT_EQ(89, results.size()); + results.clear(); + + res = art_int32_search(&t, -99, LESS_THAN_EQUALS, results); ASSERT_TRUE(res == 0); ASSERT_EQ(2, results.size()); + results.clear(); + + res = art_int32_search(&t, -99, LESS_THAN, results); + ASSERT_TRUE(res == 0); + ASSERT_EQ(1, results.size()); + results.clear(); + + res = art_int32_search(&t, -100, LESS_THAN_EQUALS, results); + ASSERT_TRUE(res == 0); + ASSERT_EQ(1, results.size()); + results.clear(); res = art_tree_destroy(&t); ASSERT_TRUE(res == 0); } -TEST(ArtTest, test_int_range_millions) { +TEST(ArtTest, test_int32_million) { art_tree t; art_tree_init(&t); @@ -820,51 +853,63 @@ TEST(ArtTest, test_int_range_millions) { // == for(uint32_t i = 0; i < 6; i++) { results.clear(); - art_int32_search(&t, (uint32_t) pow(10, i), 0, results); + art_int32_search(&t, (uint32_t) pow(10, i), EQUALS, results); ASSERT_EQ(1, results.size()); results.clear(); - art_int32_search(&t, (uint32_t) (pow(10, i) + 7), 0, results); + art_int32_search(&t, (uint32_t) (pow(10, i) + 7), EQUALS, results); ASSERT_EQ(1, results.size()); } results.clear(); - art_int32_search(&t, 1000000 - 1, 0, results); + art_int32_search(&t, 1000000 - 1, EQUALS, results); ASSERT_EQ(1, results.size()); // >= results.clear(); - art_int32_search(&t, 1000000 - 5, 1, results); + art_int32_search(&t, 1000000 - 5, GREATER_THAN_EQUALS, results); ASSERT_EQ(5, results.size()); results.clear(); - art_int32_search(&t, 1000000 - 1, 1, results); + art_int32_search(&t, 1000000 - 5, GREATER_THAN, results); + ASSERT_EQ(4, results.size()); + + results.clear(); + art_int32_search(&t, 1000000 - 1, GREATER_THAN_EQUALS, results); ASSERT_EQ(1, results.size()); results.clear(); - art_int32_search(&t, 1000000, 1, results); + art_int32_search(&t, 1000000, GREATER_THAN_EQUALS, results); ASSERT_EQ(0, results.size()); results.clear(); - art_int32_search(&t, 5, 1, results); + art_int32_search(&t, 5, GREATER_THAN_EQUALS, results); ASSERT_EQ(1000000-5, results.size()); // <= results.clear(); - art_int32_search(&t, 1000000 - 5, -1, results); + art_int32_search(&t, 1000000 - 5, LESS_THAN_EQUALS, results); ASSERT_EQ(1000000-5+1, results.size()); results.clear(); - art_int32_search(&t, 1000000 - 1, -1, results); + art_int32_search(&t, 1000000 - 1, LESS_THAN_EQUALS, results); ASSERT_EQ(1000000, results.size()); results.clear(); - art_int32_search(&t, 1000000, -1, results); + art_int32_search(&t, 1000000 - 1, LESS_THAN, results); + ASSERT_EQ(1000000-1, results.size()); + + results.clear(); + art_int32_search(&t, 1000000, LESS_THAN_EQUALS, results); ASSERT_EQ(1000000, results.size()); results.clear(); - art_int32_search(&t, 5, -1, results); + art_int32_search(&t, 5, LESS_THAN_EQUALS, results); ASSERT_EQ(5+1, results.size()); + + results.clear(); + art_int32_search(&t, 5, LESS_THAN, results); + ASSERT_EQ(5, results.size()); } TEST(ArtTest, test_int_range_byte_boundary) { @@ -885,9 +930,13 @@ TEST(ArtTest, test_int_range_byte_boundary) { std::vector results; results.clear(); - art_int32_search(&t, 255, 1, results); + art_int32_search(&t, 255, GREATER_THAN_EQUALS, results); ASSERT_EQ(45, results.size()); + results.clear(); + art_int32_search(&t, 255, GREATER_THAN, results); + ASSERT_EQ(44, results.size()); + /*std::cout << std::endl; for(auto i = 0; i < 1; i++) { auto result = results[i]; @@ -897,4 +946,38 @@ TEST(ArtTest, test_int_range_byte_boundary) { std::cout << std::endl; }*/ +} + +TEST(ArtTest, test_encode_int64) { + unsigned char chars[8]; + + unsigned char chars_175[8] = {0, 0, 0, 0, 0, 0, 0, 175}; + encode_int64(175, chars); + for(uint32_t i = 0; i < 8; i++) { + ASSERT_EQ(chars_175[i], chars[i]); + } + + unsigned char chars_neg_175[8] = {255, 255, 255, 255, 255, 255, 255, 81}; + encode_int64(-175, chars); + for(uint32_t i = 0; i < 8; i++) { + ASSERT_EQ(chars_neg_175[i], chars[i]); + } + + unsigned char chars_100K[8] = {0, 0, 0, 0, 0, 1, 134, 160}; + encode_int64(100*1000, chars); + for(uint32_t i = 0; i < 8; i++) { + ASSERT_EQ(chars_100K[i], chars[i]); + } + + unsigned char chars_large_num[8] = {255, 255, 255, 255, 128, 0, 0, 199}; + encode_int64(std::numeric_limits::max()+200, chars); + for(uint32_t i = 0; i < 8; i++) { + ASSERT_EQ(chars_large_num[i], chars[i]); + } + + unsigned char chars_large_neg_num[8] = {0, 0, 0, 0, 127, 255, 255, 57}; + encode_int64(-1 * (std::numeric_limits::max()+200), chars); + for(uint32_t i = 0; i < 8; i++) { + ASSERT_EQ(chars_large_neg_num[i], chars[i]); + } } \ No newline at end of file