2021-01-29 13:57:52 -08:00

272 lines
6.4 KiB
C++

#ifndef ART_H
#define ART_H
#include <stdint.h>
#include <stdbool.h>
#include <vector>
#include "array.h"
#include "sorted_array.h"
#define IGNORE_PRINTF 1
#ifdef __cplusplus
extern "C" {
#endif
#define NODE4 1
#define NODE16 2
#define NODE48 3
#define NODE256 4
#define MAX_PREFIX_LEN 8
#define MAX(x, y) (((x) > (y)) ? (x) : (y))
#if defined(__GNUC__) && !defined(__clang__)
# if __STDC_VERSION__ >= 199901L && 402 == (__GNUC__ * 100 + __GNUC_MINOR__)
/*
* GCC 4.2.2's C99 inline keyword support is pretty broken; avoid. Introduced in
* GCC 4.2.something, fixed in 4.3.0. So checking for specific major.minor of
* 4.2 is fine.
*/
# define BROKEN_GCC_C99_INLINE
# endif
#endif
typedef int(*art_callback)(void *data, const unsigned char *key, uint32_t key_len, void *value);
/**
* This struct is included as part
* of all the various node sizes
*/
typedef struct {
uint8_t type;
uint8_t num_children;
uint8_t partial_len;
unsigned char partial[MAX_PREFIX_LEN];
} art_node;
/**
* Small node with only 4 children
*/
typedef struct {
art_node n;
unsigned char keys[4];
art_node *children[4];
} art_node4;
/**
* Node with 16 children
*/
typedef struct {
art_node n;
unsigned char keys[16];
art_node *children[16];
} art_node16;
/**
* Node with 48 children, but
* a full 256 byte field.
*/
typedef struct {
art_node n;
unsigned char keys[256];
art_node *children[48];
} art_node48;
/**
* Full node with 256 children
*/
typedef struct {
art_node n;
art_node *children[256];
} art_node256;
/**
* Container for holding the documents that belong to a leaf.
*/
typedef struct {
sorted_array ids;
sorted_array offset_index;
array offsets;
} art_values;
/**
* Represents a leaf. These are
* of arbitrary size, as they include the key.
*/
typedef struct {
uint32_t key_len;
int64_t max_score;
art_values* values;
unsigned char key[];
} art_leaf;
/**
* Main struct, points to root.
*/
typedef struct {
art_node *root;
uint64_t size;
} art_tree;
/*
* Represents a document to be indexed.
* `offsets` refer to the index locations where a token appeared in the document
*/
typedef struct {
int32_t score;
uint32_t id;
uint32_t offsets_len;
uint32_t* offsets;
} art_document;
enum token_ordering {
FREQUENCY,
MAX_SCORE
};
enum NUM_COMPARATOR {
LESS_THAN,
LESS_THAN_EQUALS,
EQUALS,
CONTAINS,
GREATER_THAN,
GREATER_THAN_EQUALS,
RANGE_INCLUSIVE
};
/**
* Initializes an ART tree
* @return 0 on success.
*/
int art_tree_init(art_tree *t);
/**
* DEPRECATED
* Initializes an ART tree
* @return 0 on success.
*/
#define init_art_tree(...) art_tree_init(__VA_ARGS__)
/**
* Destroys an ART tree
* @return 0 on success.
*/
int art_tree_destroy(art_tree *t);
/**
* DEPRECATED
* Initializes an ART tree
* @return 0 on success.
*/
#define destroy_art_tree(...) art_tree_destroy(__VA_ARGS__)
/**
* Returns the size of the ART tree.
*/
#ifdef BROKEN_GCC_C99_INLINE
# define art_size(t) ((t)->size)
#else
inline uint64_t art_size(art_tree *t) {
return t->size;
}
#endif
/**
* Inserts a new value into the ART tree
* @arg t The tree
* @arg key The key
* @arg key_len The length of the key
* @arg value Opaque value.
* @return NULL if the item was newly inserted, otherwise
* the old value pointer is returned.
*/
void* art_insert(art_tree *t, const unsigned char *key, int key_len, art_document* document, uint32_t num_hits);
/**
* Deletes a value from the ART tree
* @arg t The tree
* @arg key The key
* @arg key_len The length of the key
* @return NULL if the item was not found, otherwise
* the value pointer is returned.
*/
void* art_delete(art_tree *t, const unsigned char *key, int key_len);
/**
* Searches for a value in the ART tree
* @arg t The tree
* @arg key The key
* @arg key_len The length of the key
* @return NULL if the item was not found, otherwise
* the value pointer is returned.
*/
void* art_search(const art_tree *t, const unsigned char *key, int key_len);
/**
* Returns the minimum valued leaf
* @return The minimum leaf or NULL
*/
art_leaf* art_minimum(art_tree *t);
/**
* Returns the maximum valued leaf
* @return The maximum leaf or NULL
*/
art_leaf* art_maximum(art_tree *t);
/**
* Iterates through the entries pairs in the map,
* invoking a callback for each. The call back gets a
* key, value for each and returns an integer stop value.
* If the callback returns non-zero, then the iteration stops.
* @arg t The tree to iterate over
* @arg cb The callback function to invoke
* @arg data Opaque handle passed to the callback
* @return 0 on success, or the return of the callback.
*/
int art_iter(art_tree *t, art_callback cb, void *data);
/**
* Iterates through the entries pairs in the map,
* invoking a callback for each that matches a given prefix.
* The call back gets a key, value for each and returns an integer stop value.
* If the callback returns non-zero, then the iteration stops.
* @arg t The tree to iterate over
* @arg prefix The prefix of keys to read
* @arg prefix_len The length of the prefix
* @arg cb The callback function to invoke
* @arg data Opaque handle passed to the callback
* @return 0 on success, or the return of the callback.
*/
int art_iter_prefix(art_tree *t, const unsigned char *prefix, int prefix_len, art_callback cb, void *data);
/**
* Returns leaves that match a given string within a fuzzy distance of max_cost.
*/
int art_fuzzy_search(art_tree *t, const unsigned char *term, const int term_len, const int min_cost, const int max_cost,
const int max_words, const token_ordering token_order, const bool prefix,
const uint32_t *filter_ids, size_t filter_ids_length,
std::vector<art_leaf *> &results);
int art_topk_iter(const art_node *root, token_ordering token_order, size_t max_results,
std::vector<art_leaf *> &results);
void encode_int32(int32_t n, unsigned char *chars);
void encode_int64(int64_t n, unsigned char *chars);
void encode_float(float n, unsigned char *chars);
int art_int32_search(art_tree *t, int32_t value, NUM_COMPARATOR comparator, std::vector<const art_leaf *> &results);
int art_int64_search(art_tree *t, int64_t value, NUM_COMPARATOR comparator, std::vector<const art_leaf *> &results);
int art_float_search(art_tree *t, float value, NUM_COMPARATOR comparator, std::vector<const art_leaf *> &results);
#ifdef __cplusplus
}
#endif
#endif