#include #if defined(__x86_64__) #include #elif defined(__aarch64__) #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include "art.h" #include "logger.h" /** * Macros to manipulate pointer tags */ #define IS_LEAF(x) (((uintptr_t)x & 1)) #define SET_LEAF(x) ((void*)((uintptr_t)x | 1)) #define LEAF_RAW(x) ((void*)((uintptr_t)x & ~1)) #define MIN3(a, b, c) ((a) < (b) ? ((a) < (c) ? (a) : (c)) : ((b) < (c) ? (b) : (c))) #ifdef IGNORE_PRINTF #define printf(fmt, ...) (0) #endif #define microseconds std::chrono::duration_cast #define USE_FREQUENCY_SCORE INT64_MIN enum recurse_progress { RECURSE, ABORT, ITERATE }; static void art_fuzzy_recurse(unsigned char p, unsigned char c, const art_node *n, int depth, const unsigned char *term, const int term_len, const int* irow, const int* jrow, const int min_cost, const int max_cost, const bool prefix, std::vector &results); void art_int_fuzzy_recurse(art_node *n, int depth, const unsigned char* int_str, int int_str_len, NUM_COMPARATOR comparator, std::vector &results); bool compare_art_leaf_frequency(const art_leaf *a, const art_leaf *b) { return posting_t::num_ids(a->values) > posting_t::num_ids(b->values); } bool compare_art_leaf_score(const art_leaf *a, const art_leaf *b) { return a->max_score > b->max_score; } bool compare_art_node_frequency(const art_node *a, const art_node *b) { uint32_t a_value = 0, b_value = 0; if(IS_LEAF(a)) { art_leaf* al = (art_leaf *) LEAF_RAW(a); a_value = posting_t::num_ids(al->values); } if(IS_LEAF(b)) { art_leaf* bl = (art_leaf *) LEAF_RAW(b); b_value = posting_t::num_ids(bl->values); } return a_value > b_value; } bool compare_art_node_score(const art_node* a, const art_node* b) { int64_t a_value = std::numeric_limits::min(), b_value = std::numeric_limits::min(); if(IS_LEAF(a)) { art_leaf* al = (art_leaf *) LEAF_RAW(a); a_value = al->max_score; } else { a_value = a->max_score; } if(IS_LEAF(b)) { art_leaf* bl = (art_leaf *) LEAF_RAW(b); b_value = bl->max_score; } else { b_value = b->max_score; } return a_value > b_value; } bool compare_art_node_frequency_pq(const art_node *a, const art_node *b) { return !compare_art_node_frequency(a, b); } bool compare_art_node_score_pq(const art_node* a, const art_node* b) { return !compare_art_node_score(a, b); } /** * Allocates a node of the given type, * initializes to zero and sets the type. */ static art_node* alloc_node(uint8_t type) { art_node* n; switch (type) { case NODE4: n = (art_node *) calloc(1, sizeof(art_node4)); break; case NODE16: n = (art_node *) calloc(1, sizeof(art_node16)); break; case NODE48: n = (art_node *) calloc(1, sizeof(art_node48)); break; case NODE256: n = (art_node *) calloc(1, sizeof(art_node256)); break; default: abort(); } n->type = type; n->max_score = 0; return n; } /** * Initializes an ART tree * @return 0 on success. */ int art_tree_init(art_tree *t) { t->root = NULL; t->size = 0; return 0; } // Recursively destroys the tree static void destroy_node(art_node *n) { // Break if null if (!n) return; // Special case leafs if (IS_LEAF(n)) { art_leaf *leaf = (art_leaf *) LEAF_RAW(n); posting_t::destroy_list(leaf->values); free(leaf); return; } // Handle each node type int i; union { art_node4 *p1; art_node16 *p2; art_node48 *p3; art_node256 *p4; } p; switch (n->type) { case NODE4: p.p1 = (art_node4*)n; for (i=0;inum_children;i++) { destroy_node(p.p1->children[i]); } break; case NODE16: p.p2 = (art_node16*)n; for (i=0;inum_children;i++) { destroy_node(p.p2->children[i]); } break; case NODE48: p.p3 = (art_node48*)n; for (i=0;i<48;i++) { destroy_node(p.p3->children[i]); } break; case NODE256: p.p4 = (art_node256*)n; for (i=0;i<256;i++) { if (p.p4->children[i]) destroy_node(p.p4->children[i]); } break; default: abort(); } // Free ourself on the way up free(n); } /** * Destroys an ART tree * @return 0 on success. */ int art_tree_destroy(art_tree *t) { destroy_node(t->root); return 0; } /** * Returns the size of the ART tree. */ #ifndef BROKEN_GCC_C99_INLINE extern inline uint64_t art_size(art_tree *t); void compare_and_match_leaf(const unsigned char *int_str, int int_str_len, const NUM_COMPARATOR &comparator, std::vector &results, const art_leaf *l); #endif static art_node** find_child(art_node *n, unsigned char c) { int i, mask, bitfield; union { art_node4 *p1; art_node16 *p2; art_node48 *p3; art_node256 *p4; } p; switch (n->type) { case NODE4: p.p1 = (art_node4*)n; for (i=0;i < n->num_children; i++) { if (p.p1->keys[i] == c) return &p.p1->children[i]; } break; { __m128i cmp; case NODE16: p.p2 = (art_node16*)n; // Compare the key to all 16 stored keys cmp = _mm_cmpeq_epi8(_mm_set1_epi8(c), _mm_loadu_si128((__m128i*)p.p2->keys)); // Use a mask to ignore children that don't exist mask = (1 << n->num_children) - 1; bitfield = _mm_movemask_epi8(cmp) & mask; /* * If we have a match (any bit set) then we can * return the pointer match using ctz to get * the index. */ if (bitfield) return &p.p2->children[__builtin_ctz(bitfield)]; break; } case NODE48: p.p3 = (art_node48*)n; i = p.p3->keys[c]; if (i) return &p.p3->children[i-1]; break; case NODE256: p.p4 = (art_node256*)n; if (p.p4->children[c]) return &p.p4->children[c]; break; default: abort(); } return NULL; } // Simple inlined if static inline int min(int a, int b) { return (a < b) ? a : b; } /** * Returns the number of prefix characters shared between * the key and node. */ static int check_prefix(const art_node *n, const unsigned char *key, int key_len, int depth) { int max_cmp = min(min(n->partial_len, MAX_PREFIX_LEN), key_len - depth); int idx; for (idx=0; idx < max_cmp; idx++) { if (n->partial[idx] != key[depth+idx]) return idx; } return idx; } /** * Checks if a leaf matches * @return 0 on success. */ static int leaf_matches(const art_leaf *n, const unsigned char *key, int key_len, int depth) { (void)depth; // Fail if the key lengths are different if (n->key_len != (uint32_t)key_len) return 1; // Compare the keys starting at the depth return memcmp(n->key, key, key_len); } /** * Searches for a value in the ART tree * @arg t The tree * @arg key The key * @arg key_len The length of the key * @return NULL if the item was not found, otherwise * the value pointer is returned. */ void* art_search(const art_tree *t, const unsigned char *key, int key_len) { art_node **child; art_node *n = t->root; int prefix_len, depth = 0; while (n) { // Might be a leaf if (IS_LEAF(n)) { n = (art_node *) LEAF_RAW(n); // Check if the expanded path matches if (!leaf_matches((art_leaf*)n, key, key_len, depth)) { return ((art_leaf*)n); } return NULL; } // Bail if the prefix does not match if (n->partial_len) { prefix_len = check_prefix(n, key, key_len, depth); if (prefix_len != min(MAX_PREFIX_LEN, n->partial_len)) { return NULL; } depth = depth + n->partial_len; if(depth >= key_len) { return NULL; } } assert(depth < key_len); // Recursively search child = find_child(n, key[depth]); n = (child) ? *child : NULL; depth++; } return NULL; } // Find the minimum leaf under a node static art_leaf* minimum(const art_node *n) { // Handle base cases if (!n) return NULL; if (IS_LEAF(n)) return (art_leaf *) LEAF_RAW(n); int idx; switch (n->type) { case NODE4: return minimum(((art_node4*)n)->children[0]); case NODE16: return minimum(((art_node16*)n)->children[0]); case NODE48: idx=0; while (!((art_node48*)n)->keys[idx]) idx++; idx = ((art_node48*)n)->keys[idx] - 1; return minimum(((art_node48*)n)->children[idx]); case NODE256: idx=0; while (!((art_node256*)n)->children[idx]) idx++; return minimum(((art_node256*)n)->children[idx]); default: abort(); } } // Find the maximum leaf under a node static art_leaf* maximum(const art_node *n) { // Handle base cases if (!n) return NULL; if (IS_LEAF(n)) return (art_leaf *) LEAF_RAW(n); int idx; switch (n->type) { case NODE4: return maximum(((art_node4*)n)->children[n->num_children-1]); case NODE16: return maximum(((art_node16*)n)->children[n->num_children-1]); case NODE48: idx=255; while (!((art_node48*)n)->keys[idx]) idx--; idx = ((art_node48*)n)->keys[idx] - 1; return maximum(((art_node48*)n)->children[idx]); case NODE256: idx=255; while (!((art_node256*)n)->children[idx]) idx--; return maximum(((art_node256*)n)->children[idx]); default: abort(); } } /** * Returns the minimum valued leaf */ art_leaf* art_minimum(art_tree *t) { return minimum((art_node*)t->root); } /** * Returns the maximum valued leaf */ art_leaf* art_maximum(art_tree *t) { return maximum((art_node*)t->root); } static void add_document_to_leaf(art_document *document, art_leaf *leaf) { leaf->max_score = MAX(leaf->max_score, document->score); posting_t::upsert(leaf->values, document->id, document->offsets); if(document->score == USE_FREQUENCY_SCORE) { leaf->max_score = posting_t::num_ids(leaf->values); } } static art_leaf* make_leaf(const unsigned char *key, uint32_t key_len, art_document *document) { art_leaf *l = (art_leaf *) malloc(sizeof(art_leaf) + key_len); l->key_len = key_len; l->max_score = document->score; uint32_t ids[1] = {document->id}; uint32_t offset_index[1] = {0}; if((2 + document->offsets.size()) <= posting_t::COMPACT_LIST_THRESHOLD_LENGTH) { compact_posting_list_t* list = compact_posting_list_t::create(1, ids, offset_index, document->offsets.size(), &document->offsets[0]); l->values = SET_COMPACT_POSTING(list); } else { posting_list_t* pl = new posting_list_t(posting_t::MAX_BLOCK_ELEMENTS); pl->upsert(document->id, document->offsets); l->values = pl; } memcpy(l->key, key, key_len); add_document_to_leaf(document, l); return l; } static uint32_t longest_common_prefix(art_leaf *l1, art_leaf *l2, int depth) { int max_cmp = min(l1->key_len, l2->key_len) - depth; int idx; for (idx=0; idx < max_cmp; idx++) { if (l1->key[depth+idx] != l2->key[depth+idx]) return idx; } return idx; } static void copy_header(art_node *dest, art_node *src) { dest->num_children = src->num_children; dest->partial_len = src->partial_len; dest->max_score = src->max_score; memcpy(dest->partial, src->partial, min(MAX_PREFIX_LEN, src->partial_len)); } static void add_child256(art_node256 *n, art_node **ref, unsigned char c, void *child) { (void)ref; n->n.num_children++; n->children[c] = (art_node *) child; n->n.max_score = MAX(n->n.max_score, ((art_leaf *) LEAF_RAW(child))->max_score); } static void add_child48(art_node48 *n, art_node **ref, unsigned char c, void *child) { if (n->n.num_children < 48) { int pos = 0; while (n->children[pos]) pos++; n->children[pos] = (art_node *) child; n->keys[c] = pos + 1; n->n.num_children++; n->n.max_score = MAX(n->n.max_score, ((art_leaf *) LEAF_RAW(child))->max_score); } else { art_node256 *new_n = (art_node256*)alloc_node(NODE256); for (int i=0;i<256;i++) { if (n->keys[i]) { new_n->children[i] = n->children[n->keys[i] - 1]; } } copy_header((art_node*)new_n, (art_node*)n); *ref = (art_node*)new_n; free(n); add_child256(new_n, ref, c, child); } } static void add_child16(art_node16 *n, art_node **ref, unsigned char c, void *child) { if (n->n.num_children < 16) { __m128i cmp; // Compare the key to all 16 stored keys cmp = _mm_cmplt_epi8(_mm_set1_epi8(c), _mm_loadu_si128((__m128i*)n->keys)); // Use a mask to ignore children that don't exist unsigned mask = (1 << n->n.num_children) - 1; unsigned bitfield = _mm_movemask_epi8(cmp) & mask; // Check if less than any unsigned idx; if (bitfield) { idx = __builtin_ctz(bitfield); memmove(n->keys+idx+1,n->keys+idx,n->n.num_children-idx); memmove(n->children+idx+1,n->children+idx, (n->n.num_children-idx)*sizeof(void*)); } else idx = n->n.num_children; // Set the child n->keys[idx] = c; n->children[idx] = (art_node *) child; n->n.num_children++; n->n.max_score = MAX(n->n.max_score, ((art_leaf *) LEAF_RAW(child))->max_score); } else { art_node48 *new_n = (art_node48*)alloc_node(NODE48); // Copy the child pointers and populate the key map memcpy(new_n->children, n->children, sizeof(void*)*n->n.num_children); for (int i=0;in.num_children;i++) { new_n->keys[n->keys[i]] = i + 1; } copy_header((art_node*)new_n, (art_node*)n); *ref = (art_node*)new_n; free(n); add_child48(new_n, ref, c, child); } } static void add_child4(art_node4 *n, art_node **ref, unsigned char c, void *child) { if (n->n.num_children < 4) { int idx; for (idx=0; idx < n->n.num_children; idx++) { if (c < n->keys[idx]) break; } // Shift to make room memmove(n->keys+idx+1, n->keys+idx, n->n.num_children - idx); memmove(n->children+idx+1, n->children+idx, (n->n.num_children - idx)*sizeof(void*)); n->keys[idx] = c; n->children[idx] = (art_node *) child; n->n.num_children++; n->n.max_score = MAX(n->n.max_score, ((art_leaf *) LEAF_RAW(child))->max_score); } else { art_node16 *new_n = (art_node16*)alloc_node(NODE16); // Copy the child pointers and the key map memcpy(new_n->children, n->children, sizeof(void*)*n->n.num_children); memcpy(new_n->keys, n->keys, sizeof(unsigned char)*n->n.num_children); copy_header((art_node*)new_n, (art_node*)n); *ref = (art_node*)new_n; free(n); add_child16(new_n, ref, c, child); } } static void add_child(art_node *n, art_node **ref, unsigned char c, void *child) { switch (n->type) { case NODE4: return add_child4((art_node4*)n, ref, c, child); case NODE16: return add_child16((art_node16*)n, ref, c, child); case NODE48: return add_child48((art_node48*)n, ref, c, child); case NODE256: return add_child256((art_node256*)n, ref, c, child); default: abort(); } } /** * Calculates the index at which the prefixes mismatch */ static int prefix_mismatch(const art_node *n, const unsigned char *key, int key_len, int depth) { int max_cmp = min(min(MAX_PREFIX_LEN, n->partial_len), key_len - depth); int idx; for (idx=0; idx < max_cmp; idx++) { if (n->partial[idx] != key[depth+idx]) return idx; } // If the prefix is short we can avoid finding a leaf if (n->partial_len > MAX_PREFIX_LEN) { // Prefix is longer than what we've checked, find a leaf art_leaf *l = minimum(n); max_cmp = min(l->key_len, key_len)- depth; for (; idx < max_cmp; idx++) { if (l->key[idx+depth] != key[depth+idx]) return idx; } } return idx; } static void* recursive_insert(art_node* n, art_node** ref, const unsigned char* key, uint32_t key_len, const int64_t docs_max_score, std::vector& documents, int depth, std::list& path, int* old) { // If we are at a NULL node, inject a leaf if (!n) { art_leaf* new_leaf = make_leaf(key, key_len, &documents[0]); for(size_t i = 1; i < documents.size(); i++) { add_document_to_leaf(&documents[i], new_leaf); } *ref = (art_node*)SET_LEAF(new_leaf); return NULL; } // If we are at a leaf, we need to replace it with a node if (IS_LEAF(n)) { art_leaf *l = (art_leaf *) LEAF_RAW(n); // Check if we are updating an existing value if (!leaf_matches(l, key, key_len, depth)) { *old = 1; for(size_t i = 0; i < documents.size(); i++) { add_document_to_leaf(&documents[i], l); } return l->values; } // New value, we must split the leaf into a node4 art_node4 *new_n = (art_node4*)alloc_node(NODE4); // Create a new leaf art_leaf *l2 = make_leaf(key, key_len, &documents[0]); uint32_t longest_prefix = longest_common_prefix(l, l2, depth); new_n->n.partial_len = longest_prefix; memcpy(new_n->n.partial, key+depth, min(MAX_PREFIX_LEN, longest_prefix)); for(size_t i = 1; i < documents.size(); i++) { add_document_to_leaf(&documents[i], l2); } // Add the leafs to the new node4 *ref = (art_node*)new_n; add_child4(new_n, ref, l->key[depth+longest_prefix], SET_LEAF(l)); add_child4(new_n, ref, l2->key[depth+longest_prefix], SET_LEAF(l2)); return NULL; } if(docs_max_score != USE_FREQUENCY_SCORE) { n->max_score = MAX(n->max_score, docs_max_score); } // Check if given node has a prefix if (n->partial_len) { // Determine if the prefixes differ, since we need to split int prefix_diff = prefix_mismatch(n, key, key_len, depth); if ((uint32_t)prefix_diff >= n->partial_len) { depth += n->partial_len; goto RECURSE_SEARCH; } // Create a new node art_node4 *new_n = (art_node4*)alloc_node(NODE4); *ref = (art_node*)new_n; new_n->n.partial_len = prefix_diff; memcpy(new_n->n.partial, n->partial, min(MAX_PREFIX_LEN, prefix_diff)); // Adjust the prefix of the old node if (n->partial_len <= MAX_PREFIX_LEN) { add_child4(new_n, ref, n->partial[prefix_diff], n); n->partial_len -= (prefix_diff+1); memmove(n->partial, n->partial+prefix_diff+1, min(MAX_PREFIX_LEN, n->partial_len)); } else { n->partial_len -= (prefix_diff+1); art_leaf *l = minimum(n); add_child4(new_n, ref, l->key[depth+prefix_diff], n); memcpy(n->partial, l->key+depth+prefix_diff+1, min(MAX_PREFIX_LEN, n->partial_len)); } // Insert the new leaf art_leaf *l = make_leaf(key, key_len, &documents[0]); for(size_t i = 1; i < documents.size(); i++) { add_document_to_leaf(&documents[i], l); } add_child4(new_n, ref, key[depth+prefix_diff], SET_LEAF(l)); path.push_back(*ref); return NULL; } RECURSE_SEARCH:; // Find a child to recurse to art_node **child = find_child(n, key[depth]); if (child) { return recursive_insert(*child, child, key, key_len, docs_max_score, documents, depth + 1, path, old); } // No child, node goes within us art_leaf *l = make_leaf(key, key_len, &documents[0]); for(size_t i = 1; i < documents.size(); i++) { add_document_to_leaf(&documents[i], l); } add_child(n, ref, key[depth], SET_LEAF(l)); path.push_back(*ref); return NULL; } /** * Inserts a new value into the ART tree * @arg t The tree * @arg key The key * @arg key_len The length of the key * @arg value Opaque value. * @return NULL if the item was newly inserted, otherwise * the old value pointer is returned. */ void* art_insert(art_tree *t, const unsigned char *key, int key_len, art_document* document) { std::vector documents = {*document}; return art_inserts(t, key, key_len, document->score, documents); } void* art_inserts(art_tree *t, const unsigned char *key, int key_len, const int64_t docs_max_score, std::vector& documents) { int old_val = 0; std::list path; bool frequency_based_ordering = (docs_max_score == USE_FREQUENCY_SCORE); void *old = recursive_insert(t->root, &t->root, key, key_len, docs_max_score, documents, 0, path, &old_val); if (!old_val) t->size++; if(frequency_based_ordering) { for(art_node* n: path) { n->max_score = MAX(n->max_score, docs_max_score); } } return old; } static void remove_child256(art_node256 *n, art_node **ref, unsigned char c) { n->children[c] = NULL; n->n.num_children--; // Resize to a node48 on underflow, not immediately to prevent // trashing if we sit on the 48/49 boundary if (n->n.num_children == 37) { art_node48 *new_n = (art_node48*)alloc_node(NODE48); *ref = (art_node*)new_n; copy_header((art_node*)new_n, (art_node*)n); int pos = 0; for (int i=0;i<256;i++) { if (n->children[i]) { new_n->children[pos] = n->children[i]; new_n->keys[i] = pos + 1; pos++; } } free(n); } } static void remove_child48(art_node48 *n, art_node **ref, unsigned char c) { int pos = n->keys[c]; n->keys[c] = 0; n->children[pos-1] = NULL; n->n.num_children--; if (n->n.num_children == 12) { art_node16 *new_n = (art_node16*)alloc_node(NODE16); *ref = (art_node*)new_n; copy_header((art_node*)new_n, (art_node*)n); int child = 0; for (int i=0;i<256;i++) { pos = n->keys[i]; if (pos) { new_n->keys[child] = i; new_n->children[child] = n->children[pos - 1]; child++; } } free(n); } } static void remove_child16(art_node16 *n, art_node **ref, art_node **l) { int pos = l - n->children; memmove(n->keys+pos, n->keys+pos+1, n->n.num_children - 1 - pos); memmove(n->children+pos, n->children+pos+1, (n->n.num_children - 1 - pos)*sizeof(void*)); n->n.num_children--; if (n->n.num_children == 3) { art_node4 *new_n = (art_node4*)alloc_node(NODE4); *ref = (art_node*)new_n; copy_header((art_node*)new_n, (art_node*)n); memcpy(new_n->keys, n->keys, 4); memcpy(new_n->children, n->children, 4*sizeof(void*)); free(n); } } static void remove_child4(art_node4 *n, art_node **ref, art_node **l) { int pos = l - n->children; memmove(n->keys+pos, n->keys+pos+1, n->n.num_children - 1 - pos); memmove(n->children+pos, n->children+pos+1, (n->n.num_children - 1 - pos)*sizeof(void*)); n->n.num_children--; // Remove nodes with only a single child if (n->n.num_children == 1) { art_node *child = n->children[0]; if (!IS_LEAF(child)) { // Concatenate the prefixes int prefix = n->n.partial_len; if (prefix < MAX_PREFIX_LEN) { n->n.partial[prefix] = n->keys[0]; prefix++; } if (prefix < MAX_PREFIX_LEN) { int sub_prefix = min(child->partial_len, MAX_PREFIX_LEN - prefix); memcpy(n->n.partial+prefix, child->partial, sub_prefix); prefix += sub_prefix; } // Store the prefix in the child memcpy(child->partial, n->n.partial, min(prefix, MAX_PREFIX_LEN)); child->partial_len += n->n.partial_len + 1; } *ref = child; free(n); } } static void remove_child(art_node *n, art_node **ref, unsigned char c, art_node **l) { switch (n->type) { case NODE4: return remove_child4((art_node4*)n, ref, l); case NODE16: return remove_child16((art_node16*)n, ref, l); case NODE48: return remove_child48((art_node48*)n, ref, c); case NODE256: return remove_child256((art_node256*)n, ref, c); default: abort(); } } static art_leaf* recursive_delete(art_node *n, art_node **ref, const unsigned char *key, int key_len, int depth) { // Search terminated if (!n) return NULL; // Handle hitting a leaf node if (IS_LEAF(n)) { art_leaf *l = (art_leaf *) LEAF_RAW(n); if (!leaf_matches(l, key, key_len, depth)) { *ref = NULL; return l; } return NULL; } // Bail if the prefix does not match if (n->partial_len) { int prefix_len = check_prefix(n, key, key_len, depth); if (prefix_len != min(MAX_PREFIX_LEN, n->partial_len)) { return NULL; } depth = depth + n->partial_len; if(depth >= key_len) { return NULL; } } assert(depth < key_len); // Find child node art_node **child = find_child(n, key[depth]); if (!child) return NULL; // If the child is leaf, delete from this node if (IS_LEAF(*child)) { art_leaf *l = (art_leaf *) LEAF_RAW(*child); if (!leaf_matches(l, key, key_len, depth)) { remove_child(n, ref, key[depth], child); return l; } return NULL; // Recurse } else { return recursive_delete(*child, child, key, key_len, depth+1); } } /** * Deletes a value from the ART tree * @arg t The tree * @arg key The key * @arg key_len The length of the key * @return NULL if the item was not found, otherwise * the value pointer is returned. */ void* art_delete(art_tree *t, const unsigned char *key, int key_len) { art_leaf *l = recursive_delete(t->root, &t->root, key, key_len, 0); if (l) { t->size--; void *old = l->values; free(l); return old; } return NULL; } /*static uint32_t get_score(art_node* child) { if (IS_LEAF(child)) { art_leaf *l = (art_leaf *) LEAF_RAW(child); return l->values->ids.getLength(); } return child->max_token_count; }*/ int art_topk_iter(const art_node *root, token_ordering token_order, size_t max_results, const uint32_t* filter_ids, size_t filter_ids_length, const std::set& exclude_leaves, const art_leaf* exact_leaf, std::vector& results) { printf("INSIDE art_topk_iter: root->type: %d\n", root->type); std::priority_queue, decltype(&compare_art_node_score_pq)> q(compare_art_node_score_pq); if(token_order == FREQUENCY) { q = std::priority_queue, decltype(&compare_art_node_frequency_pq)>(compare_art_node_frequency_pq); } q.push(root); while(!q.empty() && results.size() < max_results*4) { art_node *n = (art_node *) q.top(); q.pop(); /*if (IS_LEAF(n)) { art_leaf *l = (art_leaf *) LEAF_RAW(n); LOG(INFO) << "Top node (leaf) score: " << l->max_score; } else { LOG(INFO) << "Top node score: " << n->max_score; }*/ if (!n) continue; if (IS_LEAF(n)) { art_leaf *l = (art_leaf *) LEAF_RAW(n); //LOG(INFO) << "END LEAF SCORE: " << l->max_score; if(filter_ids_length == 0) { std::string tok(reinterpret_cast(l->key), l->key_len - 1); if(exclude_leaves.count(tok) != 0 || l == exact_leaf) { continue; } results.push_back(l); } else { // we will push leaf only if filter matches with leaf IDs bool found_atleast_one = posting_t::contains_atleast_one(l->values, filter_ids, filter_ids_length); if(found_atleast_one) { std::string tok(reinterpret_cast(l->key), l->key_len - 1); if(exclude_leaves.count(tok) != 0 || l == exact_leaf) { continue; } results.push_back(l); } } continue; } int idx; switch (n->type) { case NODE4: //LOG(INFO) << "NODE4, SCORE: " << n->max_score; for (int i=0; i < n->num_children; i++) { art_node* child = ((art_node4*)n)->children[i]; q.push(child); } break; case NODE16: //LOG(INFO) << "NODE16, SCORE: " << n->max_score; for (int i=0; i < n->num_children; i++) { q.push(((art_node16*)n)->children[i]); } break; case NODE48: //LOG(INFO) << "NODE48, SCORE: " << n->max_score; for (int i=0; i < 256; i++) { idx = ((art_node48*)n)->keys[i]; if (!idx) continue; art_node *child = ((art_node48*)n)->children[idx - 1]; q.push(child); } break; case NODE256: //LOG(INFO) << "NODE256, SCORE: " << n->max_score; for (int i=0; i < 256; i++) { if (!((art_node256*)n)->children[i]) continue; q.push(((art_node256*)n)->children[i]); } break; default: printf("ABORTING BECAUSE OF UNKNOWN NODE TYPE: %d\n", n->type); abort(); } } /*LOG(INFO) << "leaf results.size: " << results.size() << ", filter_ids_length: " << filter_ids_length << ", num_large_lists: " << num_large_lists;*/ printf("OUTSIDE art_topk_iter: results size: %d\n", results.size()); return 0; } // Recursively iterates over the tree static int recursive_iter(art_node *n, art_callback cb, void *data) { // Handle base cases if (!n) return 0; if (IS_LEAF(n)) { art_leaf *l = (art_leaf *) LEAF_RAW(n); //printf("REC LEAF len: %d, key: %s\n", l->key_len, l->key); return cb(data, (const unsigned char*)l->key, l->key_len, l->values); } //printf("INTERNAL LEAF children: %d, partial_len: %d, partial: %s\n", n->num_children, n->partial_len, n->partial); int idx, res; switch (n->type) { case NODE4: for (int i=0; i < n->num_children; i++) { //printf("INTERNAL LEAF key[i]: %c\n", ((art_node4*)n)->keys[i]); res = recursive_iter(((art_node4*)n)->children[i], cb, data); if (res) return res; } break; case NODE16: for (int i=0; i < n->num_children; i++) { res = recursive_iter(((art_node16*)n)->children[i], cb, data); if (res) return res; } break; case NODE48: for (int i=0; i < 256; i++) { idx = ((art_node48*)n)->keys[i]; if (!idx) continue; res = recursive_iter(((art_node48*)n)->children[idx-1], cb, data); if (res) return res; } break; case NODE256: for (int i=0; i < 256; i++) { if (!((art_node256*)n)->children[i]) continue; res = recursive_iter(((art_node256*)n)->children[i], cb, data); if (res) return res; } break; default: abort(); } return 0; } /** * Iterates through the entries pairs in the map, * invoking a callback for each. The call back gets a * key, value for each and returns an integer stop value. * If the callback returns non-zero, then the iteration stops. * @arg t The tree to iterate over * @arg cb The callback function to invoke * @arg data Opaque handle passed to the callback * @return 0 on success, or the return of the callback. */ int art_iter(art_tree *t, art_callback cb, void *data) { return recursive_iter(t->root, cb, data); } /** * Checks if a leaf prefix matches * @return 0 on success. */ static int leaf_prefix_matches(const art_leaf *n, const unsigned char *prefix, int prefix_len) { // Fail if the key length is too short if (n->key_len < (uint32_t)prefix_len) return 1; // Compare the keys return memcmp(n->key, prefix, prefix_len); } /** * Iterates through the entries pairs in the map, * invoking a callback for each that matches a given prefix. * The call back gets a key, value for each and returns an integer stop value. * If the callback returns non-zero, then the iteration stops. * @arg t The tree to iterate over * @arg prefix The prefix of keys to read * @arg prefix_len The length of the prefix * @arg cb The callback function to invoke * @arg data Opaque handle passed to the callback * @return 0 on success, or the return of the callback. */ int art_iter_prefix(art_tree *t, const unsigned char *key, int key_len, art_callback cb, void *data) { art_node **child; art_node *n = t->root; int prefix_len, depth = 0; while (n) { //printf("partial_len: %d\n", n->num_children); // Might be a leaf if (IS_LEAF(n)) { n = (art_node *) LEAF_RAW(n); printf("RAW LEAF len: %d, children: %d\n", n->partial_len, n->num_children); // Check if the expanded path matches if (!leaf_prefix_matches((art_leaf*)n, key, key_len)) { art_leaf *l = (art_leaf*)n; return cb(data, (const unsigned char*)l->key, l->key_len, l->values); } return 0; } printf("IS_INTERNAL\n"); printf("Prefix len: %d, children: %d, depth: %d, partial: %s\n", n->partial_len, n->num_children, depth, n->partial); // If the depth matches the prefix, we need to handle this node if (depth == key_len) { art_leaf *l = minimum(n); printf("DEPTH LEAF len: %d, key: %s\n", l->key_len, l->key); if (!leaf_prefix_matches(l, key, key_len)) return recursive_iter(n, cb, data); return 0; } // Bail if the prefix does not match if (n->partial_len) { prefix_len = prefix_mismatch(n, key, key_len, depth); // Guard if the mis-match is longer than the MAX_PREFIX_LEN if (prefix_len > n->partial_len) { prefix_len = n->partial_len; } // If there is no match, search is terminated if (!prefix_len) { return 0; } else if (depth + prefix_len == key_len) { // If we've matched the prefix, iterate on this node return recursive_iter(n, cb, data); } else if(depth + n->partial_len >= key_len) { return 0; } // if there is a full match, go deeper depth = depth + n->partial_len; } assert(depth < key_len); // Recursively search child = find_child(n, key[depth]); n = (child) ? *child : NULL; depth++; } return 0; } void print_row(const int* row, const int row_len) { for(int i=0; i<=row_len; i++) { printf("%d ", row[i]); } printf("\n"); } static inline void copyIntArray2(const int *src, int *dest, const int len) { for(int t=0; t b, column => j, c => a[i], p => a[i-1], irow => d[i-1]): // https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance#Optimal_string_alignment_distance for(int column=1; column<=term_len; column++) { int cost = (c == term[column-1]) ? 0 : 1; // column-1 used because of zero-based char array int delete_cost = jrow[column] + 1; int insert_cost = krow[column - 1] + 1; int substitution_cost = jrow[column - 1] + cost; krow[column] = min(min(insert_cost, delete_cost), substitution_cost); if(depth > 1 && column > 1 && c == term[column-1-1] && p == term[column-1]) { krow[column] = std::min(krow[column], irow[column-2] + 1); } } } static inline void art_fuzzy_children(unsigned char p, const art_node *n, int depth, const unsigned char *term, const int term_len, const int* irow, const int* jrow, const int min_cost, const int max_cost, const bool prefix, std::vector &results) { char child_char; art_node* child; switch (n->type) { case NODE4: printf("\nNODE4\n"); for (int i=n->num_children-1; i >= 0; i--) { child_char = ((art_node4*)n)->keys[i]; printf("4!child_char: %c, %d, depth: %d\n", child_char, child_char, depth); child = ((art_node4*)n)->children[i]; art_fuzzy_recurse(p, child_char, child, depth, term, term_len, irow, jrow, min_cost, max_cost, prefix, results); } break; case NODE16: printf("\nNODE16\n"); for (int i=n->num_children-1; i >= 0; i--) { child_char = ((art_node16*)n)->keys[i]; printf("16!child_char: %c, depth: %d\n", child_char, depth); child = ((art_node16*)n)->children[i]; art_fuzzy_recurse(p, child_char, child, depth, term, term_len, irow, jrow, min_cost, max_cost, prefix, results); } break; case NODE48: printf("\nNODE48\n"); for (int i=255; i >= 0; i--) { int ix = ((art_node48*)n)->keys[i]; if (!ix) continue; child = ((art_node48*)n)->children[ix - 1]; child_char = (char)i; printf("48!child_char: %c, depth: %d, ix: %d\n", child_char, depth, ix); art_fuzzy_recurse(p, child_char, child, depth, term, term_len, irow, jrow, min_cost, max_cost, prefix, results); } break; case NODE256: printf("\nNODE256\n"); for (int i=255; i >= 0; i--) { if (!((art_node256*)n)->children[i]) continue; child_char = (char) i; printf("256!child_char: %c, depth: %d\n", child_char, depth); child = ((art_node256*)n)->children[i]; art_fuzzy_recurse(p, child_char, child, depth, term, term_len, irow, jrow, min_cost, max_cost, prefix, results); } break; default: abort(); } } static inline void rotate(int &i, int &j, int &k) { int old_i = i; i = j; j = k; k = old_i; } // -1: return without adding, 0 : continue iteration, 1: return after adding static inline int fuzzy_search_state(const bool prefix, int key_index, bool last_key_char, int term_len, const int* cost_row, int min_cost, int max_cost) { // a) iter_len < term_len: "pltninum" (term) on "pst" (key) // b) term_len < iter_len: "pst" (term) on "pltninum" (key) int cost = 0; // a) because key's null character will appear first if(last_key_char) { int key_len = key_index; cost = cost_row[term_len]; if(cost >= min_cost && cost <= max_cost) { return 1; } cost = cost_row[key_len]; // used to match q=strawberries on key=strawberry, but limit to larger keys to prevent eager matches if(key_len > 5 && term_len > key_len && (term_len - key_len) <= max_cost && cost >= min_cost-1 && cost <= max_cost-1) { return 1; } return -1; } int key_len = key_index + 1; // b) we might iterate past term_len to catch trailing typos if(key_len >= term_len && prefix) { cost = cost_row[term_len]; if(cost >= min_cost && cost <= max_cost) { return 1; } } else { // `key_len` can't exceed `term_len` since length of `cost_row` is `term_len + 1` cost = cost_row[std::min(key_len, term_len)]; } int bounded_cost = (max_cost == 0) ? max_cost : (max_cost + 1); return (cost > bounded_cost) ? -1 : 0; } static void art_fuzzy_recurse(unsigned char p, unsigned char c, const art_node *n, int depth, const unsigned char *term, const int term_len, const int* irow, const int* jrow, const int min_cost, const int max_cost, const bool prefix, std::vector &results) { if (!n) return ; const int columns = term_len+1; int i=0, j=1, k=2; int row0[columns]; int row1[columns]; int row2[columns]; int* rows[3] = {row0, row1, row2}; copyIntArray2(irow, rows[i], columns); copyIntArray2(jrow, rows[j], columns); if(depth == -1) { // root node depth = 0; } else { // check indexed char first bool last_key_char = (c == '\0'); if(!prefix || !last_key_char) { levenshtein_dist(depth, p, c, term, term_len, rows[i], rows[j], rows[k]); rotate(i, j, k); p = c; } int action = fuzzy_search_state(prefix, depth, last_key_char, term_len, rows[j], min_cost, max_cost); if(1 == action) { results.push_back(n); return; } if(action == -1) { return; } depth++; } // check if node is a leaf if(IS_LEAF(n)) { art_leaf *l = (art_leaf *) LEAF_RAW(n); //std::string leaf_str((const char*)l->key, l->key_len-1); //LOG(INFO) << "leaf key: " << leaf_str; /*if(leaf_str == "illustrations") { LOG(INFO) << "here"; }*/ // look past term_len to deal with trailing typo, e.g. searching "pltinum" on "platinum" @ max_cost = 1 const int iter_len = std::min(int(l->key_len), term_len + max_cost); if(depth >= iter_len) { // when a preceding partial node completely contains the whole leaf (e.g. "[raspberr]y" on "raspberries") int action = fuzzy_search_state(prefix, depth, true, term_len, rows[j], min_cost, max_cost); if(action == 1) { results.push_back(n); } return; } // we will iterate through remaining leaf characters while(depth < iter_len) { c = l->key[depth]; bool last_key_char = (c == '\0'); if(!prefix || !last_key_char) { levenshtein_dist(depth, p, c, term, term_len, rows[i], rows[j], rows[k]); printf("leaf char: %c\n", l->key[depth]); printf("cost: %d, depth: %d, term_len: %d\n", temp_cost, depth, term_len); rotate(i, j, k); p = c; } int action = fuzzy_search_state(prefix, depth, last_key_char, term_len, rows[j], min_cost, max_cost); if(action == 1) { results.push_back(n); return; } if(action == -1) { return; } depth++; } return ; } // now check compressed prefix int partial_len = min(MAX_PREFIX_LEN, n->partial_len); //std::string partial_str(reinterpret_cast(n->partial), n->partial_len); for (int idx = 0; idx < partial_len; idx++) { c = n->partial[idx]; levenshtein_dist(depth, p, c, term, term_len, rows[i], rows[j], rows[k]); rotate(i, j, k); p = c; int action = fuzzy_search_state(prefix, depth, false, term_len, rows[j], min_cost, max_cost); if(action == 1) { results.push_back(n); return; } if(action == -1) { return; } depth++; } // Some intermediate path may have been left out if partial_len is truncated: progress the levenshtein matrix while(partial_len < n->partial_len && depth < term_len) { c = term[depth]; levenshtein_dist(depth, p, c, term, term_len, rows[i], rows[j], rows[k]); rotate(i, j, k); p = c; int action = fuzzy_search_state(prefix, depth, false, term_len, rows[j], min_cost, max_cost); if(action == 1) { results.push_back(n); return; } if(action == -1) { return; } depth++; partial_len++; } art_fuzzy_children(c, n, depth, term, term_len, rows[i], rows[j], min_cost, max_cost, prefix, results); } /** * Returns leaves that match a given string within a fuzzy distance of max_cost. */ int art_fuzzy_search(art_tree *t, const unsigned char *term, const int term_len, const int min_cost, const int max_cost, const int max_words, const token_ordering token_order, const bool prefix, const uint32_t *filter_ids, size_t filter_ids_length, std::vector &results, const std::set& exclude_leaves) { std::vector nodes; int irow[term_len + 1]; int jrow[term_len + 1]; for (int i = 0; i <= term_len; i++){ irow[i] = jrow[i] = i; } //auto begin = std::chrono::high_resolution_clock::now(); if(IS_LEAF(t->root)) { art_leaf *l = (art_leaf *) LEAF_RAW(t->root); art_fuzzy_recurse(0, l->key[0], t->root, 0, term, term_len, irow, jrow, min_cost, max_cost, prefix, nodes); } else { if(t->root == nullptr) { return 0; } // send depth as -1 to indicate that this is a root node art_fuzzy_recurse(0, 0, t->root, -1, term, term_len, irow, jrow, min_cost, max_cost, prefix, nodes); } //long long int time_micro = microseconds(std::chrono::high_resolution_clock::now() - begin).count(); //!LOG(INFO) << "Time taken for fuzz: " << time_micro << "us, size of nodes: " << nodes.size(); //auto begin = std::chrono::high_resolution_clock::now(); size_t key_len = prefix ? term_len + 1 : term_len; art_leaf* exact_leaf = (art_leaf *) art_search(t, term, key_len); //LOG(INFO) << "exact_leaf: " << exact_leaf << ", term: " << term << ", term_len: " << term_len; for(auto node: nodes) { art_topk_iter(node, token_order, max_words, filter_ids, filter_ids_length, exclude_leaves, exact_leaf, results); } if(token_order == FREQUENCY) { std::sort(results.begin(), results.end(), compare_art_leaf_frequency); } else { std::sort(results.begin(), results.end(), compare_art_leaf_score); } if(exact_leaf && min_cost == 0) { results.insert(results.begin(), exact_leaf); } if(results.size() > max_words) { results.resize(max_words); } /*auto time_micro = microseconds(std::chrono::high_resolution_clock::now() - begin).count(); if(time_micro > 1000) { LOG(INFO) << "Time taken for art_topk_iter: " << time_micro << "us, size of nodes: " << nodes.size() << ", filter_ids_length: " << filter_ids_length; }*/ return 0; } void encode_int32(int32_t n, unsigned char *chars) { unsigned char symbols[16] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }; unsigned char bytes[4]; bytes[0] = (unsigned char) ((n >> 24) & 0xFF); bytes[1] = (unsigned char) ((n >> 16) & 0xFF); bytes[2] = (unsigned char) ((n >> 8) & 0xFF); bytes[3] = (unsigned char) (n & 0xFF); for(uint32_t i = 0; i < 4; i++) { chars[2*i] = symbols[((bytes[i] >> 4) & 0x0F)]; chars[2*i+1] = symbols[(bytes[i] & 0x0F)]; } } void encode_int64(int64_t n, unsigned char *chars) { chars[0] = (unsigned char) ((n >> 56) & 0xFF); chars[1] = (unsigned char) ((n >> 48) & 0xFF); chars[2] = (unsigned char) ((n >> 40) & 0xFF); chars[3] = (unsigned char) ((n >> 32) & 0xFF); chars[4] = (unsigned char) ((n >> 24) & 0xFF); chars[5] = (unsigned char) ((n >> 16) & 0xFF); chars[6] = (unsigned char) ((n >> 8) & 0xFF); chars[7] = (unsigned char) (n & 0xFF); } // See: https://github.com/apache/hbase/blob/master/hbase-common/src/main/java/org/apache/hadoop/hbase/util/OrderedBytes.java#L1372 void encode_float(float n, unsigned char *chars) { int32_t i; memcpy(&i, &n, sizeof(int32_t)); i ^= ((i >> (std::numeric_limits::digits - 1)) | INT32_MIN); encode_int32(i, chars); } // Implements ==, <= and >= recurse_progress matches(unsigned char a, unsigned char b, NUM_COMPARATOR comparator) { switch(comparator) { case LESS_THAN: case LESS_THAN_EQUALS: if (a == b) return RECURSE; else if(a < b) return ITERATE; return ABORT; case EQUALS: if(a == b) return RECURSE; return ABORT; case GREATER_THAN: case GREATER_THAN_EQUALS: if (a == b) return RECURSE; else if(a > b) return ITERATE; return ABORT; default: abort(); } } static void art_iter(const art_node *n, const unsigned char* int_str, int int_str_len, NUM_COMPARATOR comparator, std::vector &results) { // Handle base cases if (!n) return ; if (IS_LEAF(n)) { art_leaf *l = (art_leaf *) LEAF_RAW(n); compare_and_match_leaf(int_str, int_str_len, comparator, results, l); return ; } int idx; switch (n->type) { case NODE4: for (int i=0; i < n->num_children; i++) { art_iter(((art_node4 *) n)->children[i], int_str, int_str_len, comparator, results); } break; case NODE16: for (int i=0; i < n->num_children; i++) { art_iter(((art_node16 *) n)->children[i], int_str, int_str_len, comparator, results); } break; case NODE48: for (int i=0; i < 256; i++) { idx = ((art_node48*)n)->keys[i]; if (!idx) continue; art_iter(((art_node48 *) n)->children[idx - 1], int_str, int_str_len, comparator, results); } break; case NODE256: for (int i=0; i < 256; i++) { if (!((art_node256*)n)->children[i]) continue; art_iter(((art_node256 *) n)->children[i], int_str, int_str_len, comparator, results); } break; default: abort(); } return ; } static inline void art_int_fuzzy_children(const art_node *n, int depth, const unsigned char* int_str, int int_str_len, NUM_COMPARATOR comparator, std::vector &results) { unsigned char child_char; art_node* child; switch (n->type) { case NODE4: printf("\nNODE4\n"); for (int i=n->num_children-1; i >= 0; i--) { child_char = ((art_node4*)n)->keys[i]; printf("4!child_char: %c, %d, depth: %d\n", child_char, child_char, depth); child = ((art_node4*)n)->children[i]; recurse_progress progress = matches(child_char, int_str[depth], comparator); if(progress == RECURSE) { art_int_fuzzy_recurse(child, depth+1, int_str, int_str_len, comparator, results); } else if(progress == ITERATE) { art_iter(child, int_str, int_str_len, comparator, results); } } break; case NODE16: printf("\nNODE16\n"); for (int i=n->num_children-1; i >= 0; i--) { child_char = ((art_node16*)n)->keys[i]; printf("16!child_char: %c, depth: %d\n", child_char, depth); child = ((art_node16*)n)->children[i]; recurse_progress progress = matches(child_char, int_str[depth], comparator); if(progress == RECURSE) { art_int_fuzzy_recurse(child, depth+1, int_str, int_str_len, comparator, results); } else if(progress == ITERATE) { art_iter(child, int_str, int_str_len, comparator, results); } } break; case NODE48: printf("\nNODE48\n"); for (int i=255; i >= 0; i--) { int ix = ((art_node48*)n)->keys[i]; if (!ix) continue; child = ((art_node48*)n)->children[ix - 1]; child_char = (unsigned char)i; printf("48!child_char: %c, depth: %d, ix: %d\n", child_char, depth, ix); recurse_progress progress = matches(child_char, int_str[depth], comparator); if(progress == RECURSE) { art_int_fuzzy_recurse(child, depth+1, int_str, int_str_len, comparator, results); } else if(progress == ITERATE) { art_iter(child, int_str, int_str_len, comparator, results); } } break; case NODE256: printf("\nNODE256\n"); for (int i=255; i >= 0; i--) { if (!((art_node256*)n)->children[i]) continue; child_char = (unsigned char) i; printf("256!child_char: %c, depth: %d\n", child_char, depth); child = ((art_node256*)n)->children[i]; recurse_progress progress = matches(child_char, int_str[depth], comparator); if(progress == RECURSE) { art_int_fuzzy_recurse(child, depth+1, int_str, int_str_len, comparator, results); } else if(progress == ITERATE) { art_iter(child, int_str, int_str_len, comparator, results); } } break; default: abort(); } } void art_int_fuzzy_recurse(art_node *n, int depth, const unsigned char* int_str, int int_str_len, NUM_COMPARATOR comparator, std::vector &results) { if (!n) return ; if(IS_LEAF(n)) { art_leaf *l = (art_leaf *) LEAF_RAW(n); while(depth < int_str_len) { unsigned char c = l->key[depth]; recurse_progress progress = matches(c, int_str[depth], comparator); if(progress == ABORT) { return; } if(progress == ITERATE) { break; } depth++; } compare_and_match_leaf(int_str, int_str_len, comparator, results, l); return ; } const int partial_len = min(MAX_PREFIX_LEN, n->partial_len); const int end_index = min(partial_len, int_str_len); printf("\npartial_len: %d", partial_len); for(int idx=0; idxpartial[idx]; recurse_progress progress = matches(c, int_str[depth+idx], comparator); if(progress == ABORT) { return; } if(progress == ITERATE) { return art_iter(n, int_str, int_str_len, comparator, results); } } depth += n->partial_len; art_int_fuzzy_children(n, depth, int_str, int_str_len, comparator, results); } void compare_and_match_leaf(const unsigned char *int_str, int int_str_len, const NUM_COMPARATOR &comparator, std::vector &results, const art_leaf *l) { if(comparator == LESS_THAN || comparator == GREATER_THAN) { for(uint32_t i = 0; i < l->key_len; i++) { if(int_str[i] != l->key[i]) { results.push_back(l); return ; } } } else { results.push_back(l); } } int art_int32_search(art_tree *t, int32_t value, NUM_COMPARATOR comparator, std::vector &results) { unsigned char chars[8]; encode_int32(value, chars); art_int_fuzzy_recurse(t->root, 0, chars, 8, comparator, results); return 0; } int art_int64_search(art_tree *t, int64_t value, NUM_COMPARATOR comparator, std::vector &results) { unsigned char chars[8]; encode_int64(value, chars); art_int_fuzzy_recurse(t->root, 0, chars, 8, comparator, results); return 0; } int art_float_search(art_tree *t, float value, NUM_COMPARATOR comparator, std::vector &results) { unsigned char chars[8]; encode_float(value, chars); art_int_fuzzy_recurse(t->root, 0, chars, 8, comparator, results); return 0; }