mirror of
https://github.com/typesense/typesense.git
synced 2025-05-15 02:03:19 +08:00
Adding a max_score field to intermediate nodes that denote the maximum score of lead nodes.
This is useful for pruning search space when we want to identify top-K matches for a given prefix.
This commit is contained in:
parent
8f91f11cb1
commit
5246a1683d
@ -39,6 +39,7 @@ typedef struct {
|
||||
uint8_t num_children;
|
||||
uint32_t partial_len;
|
||||
unsigned char partial[MAX_PREFIX_LEN];
|
||||
uint16_t max_score;
|
||||
} art_node;
|
||||
|
||||
/**
|
||||
@ -83,6 +84,7 @@ typedef struct {
|
||||
*/
|
||||
typedef struct {
|
||||
void *value;
|
||||
uint16_t score;
|
||||
uint32_t key_len;
|
||||
unsigned char key[];
|
||||
} art_leaf;
|
||||
@ -141,7 +143,7 @@ inline uint64_t art_size(art_tree *t) {
|
||||
* @return NULL if the item was newly inserted, otherwise
|
||||
* the old value pointer is returned.
|
||||
*/
|
||||
void* art_insert(art_tree *t, const unsigned char *key, int key_len, void *value);
|
||||
void* art_insert(art_tree *t, const unsigned char *key, int key_len, int score, void *value);
|
||||
|
||||
/**
|
||||
* Deletes a value from the ART tree
|
||||
|
24
src/art.c
24
src/art.c
@ -345,9 +345,10 @@ art_leaf* art_maximum(art_tree *t) {
|
||||
return maximum((art_node*)t->root);
|
||||
}
|
||||
|
||||
static art_leaf* make_leaf(const unsigned char *key, int key_len, void *value) {
|
||||
static art_leaf* make_leaf(const unsigned char *key, int key_len, int score, void *value) {
|
||||
art_leaf *l = malloc(sizeof(art_leaf)+key_len);
|
||||
l->value = value;
|
||||
l->score = (uint16_t) score;
|
||||
l->key_len = key_len;
|
||||
memcpy(l->key, key, key_len);
|
||||
return l;
|
||||
@ -510,10 +511,10 @@ static int prefix_mismatch(const art_node *n, const unsigned char *key, int key_
|
||||
return idx;
|
||||
}
|
||||
|
||||
static void* recursive_insert(art_node *n, art_node **ref, const unsigned char *key, int key_len, void *value, int depth, int *old) {
|
||||
static void* recursive_insert(art_node *n, art_node **ref, const unsigned char *key, int key_len, int score, void *value, int depth, int *old) {
|
||||
// If we are at a NULL node, inject a leaf
|
||||
if (!n) {
|
||||
*ref = (art_node*)SET_LEAF(make_leaf(key, key_len, value));
|
||||
*ref = (art_node*)SET_LEAF(make_leaf(key, key_len, score, value));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -526,6 +527,7 @@ static void* recursive_insert(art_node *n, art_node **ref, const unsigned char *
|
||||
*old = 1;
|
||||
void *old_val = l->value;
|
||||
l->value = value;
|
||||
l->score = (uint16_t) score;
|
||||
return old_val;
|
||||
}
|
||||
|
||||
@ -533,7 +535,7 @@ static void* recursive_insert(art_node *n, art_node **ref, const unsigned char *
|
||||
art_node4 *new = (art_node4*)alloc_node(NODE4);
|
||||
|
||||
// Create a new leaf
|
||||
art_leaf *l2 = make_leaf(key, key_len, value);
|
||||
art_leaf *l2 = make_leaf(key, key_len, score, value);
|
||||
|
||||
// Determine longest prefix
|
||||
int longest_prefix = longest_common_prefix(l, l2, depth);
|
||||
@ -551,6 +553,7 @@ static void* recursive_insert(art_node *n, art_node **ref, const unsigned char *
|
||||
// Determine if the prefixes differ, since we need to split
|
||||
int prefix_diff = prefix_mismatch(n, key, key_len, depth);
|
||||
if ((uint32_t)prefix_diff >= n->partial_len) {
|
||||
n->max_score = (uint16_t) min(n->max_score, score);
|
||||
depth += n->partial_len;
|
||||
goto RECURSE_SEARCH;
|
||||
}
|
||||
@ -559,6 +562,7 @@ static void* recursive_insert(art_node *n, art_node **ref, const unsigned char *
|
||||
art_node4 *new = (art_node4*)alloc_node(NODE4);
|
||||
*ref = (art_node*)new;
|
||||
new->n.partial_len = prefix_diff;
|
||||
new->n.max_score = (uint16_t) score;
|
||||
memcpy(new->n.partial, n->partial, min(MAX_PREFIX_LEN, prefix_diff));
|
||||
|
||||
// Adjust the prefix of the old node
|
||||
@ -575,8 +579,10 @@ static void* recursive_insert(art_node *n, art_node **ref, const unsigned char *
|
||||
min(MAX_PREFIX_LEN, n->partial_len));
|
||||
}
|
||||
|
||||
n->max_score = (uint16_t) min(n->max_score, score);
|
||||
|
||||
// Insert the new leaf
|
||||
art_leaf *l = make_leaf(key, key_len, value);
|
||||
art_leaf *l = make_leaf(key, key_len, score, value);
|
||||
add_child4(new, ref, key[depth+prefix_diff], SET_LEAF(l));
|
||||
return NULL;
|
||||
}
|
||||
@ -586,11 +592,11 @@ static void* recursive_insert(art_node *n, art_node **ref, const unsigned char *
|
||||
// Find a child to recurse to
|
||||
art_node **child = find_child(n, key[depth]);
|
||||
if (child) {
|
||||
return recursive_insert(*child, child, key, key_len, value, depth+1, old);
|
||||
return recursive_insert(*child, child, key, key_len, score, value, depth+1, old);
|
||||
}
|
||||
|
||||
// No child, node goes within us
|
||||
art_leaf *l = make_leaf(key, key_len, value);
|
||||
art_leaf *l = make_leaf(key, key_len, score, value);
|
||||
add_child(n, ref, key[depth], SET_LEAF(l));
|
||||
return NULL;
|
||||
}
|
||||
@ -604,9 +610,9 @@ static void* recursive_insert(art_node *n, art_node **ref, const unsigned char *
|
||||
* @return NULL if the item was newly inserted, otherwise
|
||||
* the old value pointer is returned.
|
||||
*/
|
||||
void* art_insert(art_tree *t, const unsigned char *key, int key_len, void *value) {
|
||||
void* art_insert(art_tree *t, const unsigned char *key, int key_len, int score, void *value) {
|
||||
int old_val = 0;
|
||||
void *old = recursive_insert(t->root, &t->root, key, key_len, value, 0, &old_val);
|
||||
void *old = recursive_insert(t->root, &t->root, key, key_len, score, value, 0, &old_val);
|
||||
if (!old_val) t->size++;
|
||||
return old;
|
||||
}
|
||||
|
@ -26,11 +26,11 @@ int main() {
|
||||
|
||||
while (std::getline(infile, line)) {
|
||||
//cout << "Line: " << line << ", number = " << line << endl;
|
||||
art_insert(&t, (const unsigned char *) line.c_str(), line.length(), (void*)num);
|
||||
art_insert(&t, (const unsigned char *) line.c_str(), line.length(), line.length(), (void*)num);
|
||||
num++;
|
||||
}
|
||||
|
||||
const unsigned char *prefix = (const unsigned char *) "amzfing";
|
||||
const unsigned char *prefix = (const unsigned char *) "ama";
|
||||
art_iter_fuzzy_prefix(&t, prefix, strlen((const char *) prefix), 2, test_prefix_cb, NULL);
|
||||
// art_iter_prefix(&t, prefix, strlen((const char *) prefix), test_prefix_cb, NULL);
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user