mirror of
https://github.com/typesense/typesense.git
synced 2025-05-22 23:06:30 +08:00
Improve fuzzy search filtering perf.
This commit is contained in:
parent
0626ca8cf6
commit
ef47f54d11
10
src/art.cpp
10
src/art.cpp
@ -950,6 +950,8 @@ int art_topk_iter(const art_node *root, token_ordering token_order, size_t max_r
|
||||
|
||||
q.push(root);
|
||||
|
||||
size_t num_large_lists = 0;
|
||||
|
||||
while(!q.empty() && results.size() < max_results*4) {
|
||||
art_node *n = (art_node *) q.top();
|
||||
q.pop();
|
||||
@ -974,6 +976,10 @@ int art_topk_iter(const art_node *root, token_ordering token_order, size_t max_r
|
||||
results.push_back(l);
|
||||
} else {
|
||||
// we will push leaf only if filter matches with leaf IDs
|
||||
if(!IS_COMPACT_POSTING(l->values)) {
|
||||
num_large_lists++;
|
||||
}
|
||||
|
||||
bool found_atleast_one = posting_t::contains_atleast_one(l->values, filter_ids, filter_ids_length);
|
||||
if(found_atleast_one) {
|
||||
results.push_back(l);
|
||||
@ -1024,7 +1030,9 @@ int art_topk_iter(const art_node *root, token_ordering token_order, size_t max_r
|
||||
}
|
||||
}
|
||||
|
||||
LOG(INFO) << "leaf results.size: " << results.size();
|
||||
LOG(INFO) << "leaf results.size: " << results.size()
|
||||
<< ", filter_ids_length: " << filter_ids_length
|
||||
<< ", num_large_lists: " << num_large_lists;
|
||||
|
||||
printf("OUTSIDE art_topk_iter: results size: %d\n", results.size());
|
||||
return 0;
|
||||
|
@ -221,17 +221,23 @@ bool compact_posting_list_t::contains_atleast_one(const uint32_t* target_ids, si
|
||||
size_t num_existing_offsets = id_offsets[i];
|
||||
size_t existing_id = id_offsets[i + num_existing_offsets + 1];
|
||||
|
||||
if(existing_id == target_ids[target_ids_index]) {
|
||||
return true;
|
||||
// Returns iterator to the first element that is >= to value or last if no such element is found.
|
||||
size_t found_index = std::lower_bound(target_ids + target_ids_index,
|
||||
target_ids + target_ids_size, existing_id) - target_ids;
|
||||
|
||||
if(found_index == target_ids_size) {
|
||||
// all elements are lesser than lowest value (existing_id), so we can stop looking
|
||||
return false;
|
||||
} else {
|
||||
if(target_ids[found_index] == existing_id) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// adjust lower bound to found_index+1 whose value is >= `existing_id`
|
||||
target_ids_index = found_index;
|
||||
}
|
||||
|
||||
if(target_ids[target_ids_index] < existing_id) {
|
||||
while(target_ids_index < target_ids_size && target_ids[target_ids_index] < existing_id) {
|
||||
target_ids_index++;
|
||||
}
|
||||
} else {
|
||||
i += num_existing_offsets + 2;
|
||||
}
|
||||
i += num_existing_offsets + 2;
|
||||
}
|
||||
|
||||
return false;
|
||||
|
@ -1325,6 +1325,12 @@ TEST_F(PostingListTest, CompactPostingListContainsAtleastOne) {
|
||||
ASSERT_TRUE(COMPACT_POSTING_PTR(obj)->contains_atleast_one(&target_ids3[0], target_ids3.size()));
|
||||
ASSERT_FALSE(COMPACT_POSTING_PTR(obj)->contains_atleast_one(&target_ids4[0], target_ids4.size()));
|
||||
|
||||
std::vector<uint32_t> target_ids5 = {2, 3};
|
||||
ASSERT_TRUE(COMPACT_POSTING_PTR(obj)->contains_atleast_one(&target_ids5[0], target_ids5.size()));
|
||||
|
||||
std::vector<uint32_t> target_ids6 = {0, 1, 2};
|
||||
ASSERT_FALSE(COMPACT_POSTING_PTR(obj)->contains_atleast_one(&target_ids6[0], target_ids6.size()));
|
||||
|
||||
posting_t::destroy_list(obj);
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user