diff --git a/src/art.cpp b/src/art.cpp index 311e1993..bb373813 100644 --- a/src/art.cpp +++ b/src/art.cpp @@ -950,6 +950,8 @@ int art_topk_iter(const art_node *root, token_ordering token_order, size_t max_r q.push(root); + size_t num_large_lists = 0; + while(!q.empty() && results.size() < max_results*4) { art_node *n = (art_node *) q.top(); q.pop(); @@ -974,6 +976,10 @@ int art_topk_iter(const art_node *root, token_ordering token_order, size_t max_r results.push_back(l); } else { // we will push leaf only if filter matches with leaf IDs + if(!IS_COMPACT_POSTING(l->values)) { + num_large_lists++; + } + bool found_atleast_one = posting_t::contains_atleast_one(l->values, filter_ids, filter_ids_length); if(found_atleast_one) { results.push_back(l); @@ -1024,7 +1030,9 @@ int art_topk_iter(const art_node *root, token_ordering token_order, size_t max_r } } - LOG(INFO) << "leaf results.size: " << results.size(); + LOG(INFO) << "leaf results.size: " << results.size() + << ", filter_ids_length: " << filter_ids_length + << ", num_large_lists: " << num_large_lists; printf("OUTSIDE art_topk_iter: results size: %d\n", results.size()); return 0; diff --git a/src/posting.cpp b/src/posting.cpp index 4ec4ab00..14484af0 100644 --- a/src/posting.cpp +++ b/src/posting.cpp @@ -221,17 +221,23 @@ bool compact_posting_list_t::contains_atleast_one(const uint32_t* target_ids, si size_t num_existing_offsets = id_offsets[i]; size_t existing_id = id_offsets[i + num_existing_offsets + 1]; - if(existing_id == target_ids[target_ids_index]) { - return true; + // Returns iterator to the first element that is >= to value or last if no such element is found. + size_t found_index = std::lower_bound(target_ids + target_ids_index, + target_ids + target_ids_size, existing_id) - target_ids; + + if(found_index == target_ids_size) { + // all elements are lesser than lowest value (existing_id), so we can stop looking + return false; + } else { + if(target_ids[found_index] == existing_id) { + return true; + } + + // adjust lower bound to found_index+1 whose value is >= `existing_id` + target_ids_index = found_index; } - if(target_ids[target_ids_index] < existing_id) { - while(target_ids_index < target_ids_size && target_ids[target_ids_index] < existing_id) { - target_ids_index++; - } - } else { - i += num_existing_offsets + 2; - } + i += num_existing_offsets + 2; } return false; diff --git a/test/posting_list_test.cpp b/test/posting_list_test.cpp index d28db670..09c567ef 100644 --- a/test/posting_list_test.cpp +++ b/test/posting_list_test.cpp @@ -1325,6 +1325,12 @@ TEST_F(PostingListTest, CompactPostingListContainsAtleastOne) { ASSERT_TRUE(COMPACT_POSTING_PTR(obj)->contains_atleast_one(&target_ids3[0], target_ids3.size())); ASSERT_FALSE(COMPACT_POSTING_PTR(obj)->contains_atleast_one(&target_ids4[0], target_ids4.size())); + std::vector target_ids5 = {2, 3}; + ASSERT_TRUE(COMPACT_POSTING_PTR(obj)->contains_atleast_one(&target_ids5[0], target_ids5.size())); + + std::vector target_ids6 = {0, 1, 2}; + ASSERT_FALSE(COMPACT_POSTING_PTR(obj)->contains_atleast_one(&target_ids6[0], target_ids6.size())); + posting_t::destroy_list(obj); }