mirror of
https://github.com/typesense/typesense.git
synced 2025-05-19 13:12:22 +08:00
Add contains one helper for posting list.
This commit is contained in:
parent
93261178a9
commit
c9fd7bc506
@ -34,6 +34,8 @@ struct compact_posting_list_t {
|
||||
uint32_t last_id();
|
||||
|
||||
uint32_t num_ids() const;
|
||||
|
||||
bool contains_atleast_one(const uint32_t* target_ids, size_t target_ids_size);
|
||||
};
|
||||
|
||||
class posting_t {
|
||||
@ -49,12 +51,16 @@ public:
|
||||
|
||||
static void erase(void*& obj, uint32_t id);
|
||||
|
||||
static void destroy_list(void*& obj);
|
||||
|
||||
static uint32_t num_ids(const void* obj);
|
||||
|
||||
static uint32_t first_id(const void* obj);
|
||||
|
||||
static bool contains(const void* obj, uint32_t id);
|
||||
|
||||
static bool contains_atleast_one(const void* obj, const uint32_t* target_ids, size_t target_ids_size);
|
||||
|
||||
static void merge(const std::vector<void*>& posting_lists, std::vector<uint32_t>& result_ids);
|
||||
|
||||
static void intersect(const std::vector<void*>& posting_lists, std::vector<uint32_t>& result_ids);
|
||||
|
@ -125,6 +125,8 @@ public:
|
||||
|
||||
bool contains(uint32_t id);
|
||||
|
||||
bool contains_atleast_one(const uint32_t* target_ids, size_t target_ids_size);
|
||||
|
||||
iterator_t new_iterator();
|
||||
|
||||
static void merge(const std::vector<posting_list_t*>& posting_lists, std::vector<uint32_t>& result_ids);
|
||||
|
@ -213,6 +213,30 @@ bool compact_posting_list_t::contains(uint32_t id) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool compact_posting_list_t::contains_atleast_one(const uint32_t* target_ids, size_t target_ids_size) {
|
||||
size_t i = 0;
|
||||
size_t target_ids_index = 0;
|
||||
|
||||
while(i < length && target_ids_index < target_ids_size) {
|
||||
size_t num_existing_offsets = id_offsets[i];
|
||||
size_t existing_id = id_offsets[i + num_existing_offsets + 1];
|
||||
|
||||
if(existing_id == target_ids[target_ids_index]) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if(target_ids[target_ids_index] < existing_id) {
|
||||
while(target_ids_index < target_ids_size && target_ids[target_ids_index] < existing_id) {
|
||||
target_ids_index++;
|
||||
}
|
||||
} else {
|
||||
i += num_existing_offsets + 2;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/* posting operations */
|
||||
|
||||
void posting_t::upsert(void*& obj, uint32_t id, const std::vector<uint32_t>& offsets) {
|
||||
@ -324,6 +348,16 @@ bool posting_t::contains(const void* obj, uint32_t id) {
|
||||
}
|
||||
}
|
||||
|
||||
bool posting_t::contains_atleast_one(const void* obj, const uint32_t* target_ids, size_t target_ids_size) {
|
||||
if(IS_COMPACT_POSTING(obj)) {
|
||||
compact_posting_list_t* list = COMPACT_POSTING_PTR(obj);
|
||||
return list->contains_atleast_one(target_ids, target_ids_size);
|
||||
} else {
|
||||
posting_list_t* list = (posting_list_t*) RAW_POSTING_PTR(obj);
|
||||
return list->contains_atleast_one(target_ids, target_ids_size);
|
||||
}
|
||||
}
|
||||
|
||||
void posting_t::merge(const std::vector<void*>& raw_posting_lists, std::vector<uint32_t>& result_ids) {
|
||||
// we will have to convert the compact posting list (if any) to full form
|
||||
std::vector<posting_list_t*> plists;
|
||||
@ -384,3 +418,15 @@ bool posting_t::block_intersect(const std::vector<void*>& raw_posting_lists, siz
|
||||
|
||||
return done;
|
||||
}
|
||||
|
||||
void posting_t::destroy_list(void*& obj) {
|
||||
if(IS_COMPACT_POSTING(obj)) {
|
||||
compact_posting_list_t* list = COMPACT_POSTING_PTR(obj);
|
||||
free(list); // assigned via malloc, so must be free()d
|
||||
} else {
|
||||
posting_list_t* list = (posting_list_t*) RAW_POSTING_PTR(obj);
|
||||
delete list;
|
||||
}
|
||||
|
||||
obj = nullptr;
|
||||
}
|
||||
|
@ -886,6 +886,30 @@ bool posting_list_t::contains(uint32_t id) {
|
||||
return potential_block->contains(id);
|
||||
}
|
||||
|
||||
bool posting_list_t::contains_atleast_one(const uint32_t* target_ids, size_t target_ids_size) {
|
||||
posting_list_t::iterator_t it = new_iterator();
|
||||
size_t target_ids_index = 0;
|
||||
|
||||
while(target_ids_index < target_ids_size && it.valid()) {
|
||||
uint32_t id = it.id();
|
||||
|
||||
if(id == target_ids[target_ids_index]) {
|
||||
return true;
|
||||
} else {
|
||||
// advance smallest value
|
||||
if(id > target_ids[target_ids_index]) {
|
||||
while(target_ids_index < target_ids_size && target_ids[target_ids_index] < id) {
|
||||
target_ids_index++;
|
||||
}
|
||||
} else {
|
||||
it.skip_to(target_ids[target_ids_index]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/* iterator_t operations */
|
||||
|
||||
posting_list_t::iterator_t::iterator_t(posting_list_t::block_t* root):
|
||||
@ -956,5 +980,6 @@ posting_list_t::iterator_t::iterator_t(iterator_t&& rhs) noexcept {
|
||||
ids = rhs.ids;
|
||||
|
||||
rhs.curr_block = nullptr;
|
||||
rhs.uncompressed_block = nullptr;
|
||||
rhs.ids = nullptr;
|
||||
}
|
||||
|
@ -738,6 +738,42 @@ TEST(PostingListTest, IntersectionSkipBlocks) {
|
||||
delete [] final_results;
|
||||
}
|
||||
|
||||
TEST(PostingListTest, PostingListContainsAtleastOne) {
|
||||
// when posting list is larger than target IDs
|
||||
posting_list_t p1(100);
|
||||
|
||||
for(size_t i = 20; i < 1000; i++) {
|
||||
p1.upsert(i, {1, 2, 3});
|
||||
}
|
||||
|
||||
std::vector<uint32_t> target_ids1 = {200, 300};
|
||||
std::vector<uint32_t> target_ids2 = {200, 3000};
|
||||
std::vector<uint32_t> target_ids3 = {2000, 3000};
|
||||
|
||||
ASSERT_TRUE(p1.contains_atleast_one(&target_ids1[0], target_ids1.size()));
|
||||
ASSERT_TRUE(p1.contains_atleast_one(&target_ids2[0], target_ids2.size()));
|
||||
ASSERT_FALSE(p1.contains_atleast_one(&target_ids3[0], target_ids3.size()));
|
||||
|
||||
// when posting list is smaller than target IDs
|
||||
posting_list_t p2(2);
|
||||
for(size_t i = 10; i < 20; i++) {
|
||||
p2.upsert(i, {1, 2, 3});
|
||||
}
|
||||
|
||||
target_ids1.clear();
|
||||
for(size_t i = 5; i < 1000; i++) {
|
||||
target_ids1.push_back(i);
|
||||
}
|
||||
|
||||
target_ids2.clear();
|
||||
for(size_t i = 25; i < 1000; i++) {
|
||||
target_ids2.push_back(i);
|
||||
}
|
||||
|
||||
ASSERT_TRUE(p2.contains_atleast_one(&target_ids1[0], target_ids1.size()));
|
||||
ASSERT_FALSE(p2.contains_atleast_one(&target_ids2[0], target_ids2.size()));
|
||||
}
|
||||
|
||||
TEST(PostingListTest, CompactPostingListUpsertAppends) {
|
||||
uint32_t ids[] = {0, 1000, 1002};
|
||||
uint32_t offset_index[] = {0, 3, 6};
|
||||
@ -978,6 +1014,29 @@ TEST(PostingListTest, CompactPostingListErase) {
|
||||
free(list);
|
||||
}
|
||||
|
||||
TEST(PostingListTest, CompactPostingListContainsAtleastOne) {
|
||||
uint32_t ids[] = {5, 6, 7, 8};
|
||||
uint32_t offset_index[] = {0, 3, 6, 9};
|
||||
uint32_t offsets[] = {0, 3, 4, 0, 3, 4, 0, 3, 4, 0, 3, 4};
|
||||
|
||||
std::vector<uint32_t> target_ids1 = {4, 7, 11};
|
||||
std::vector<uint32_t> target_ids2 = {2, 3, 4, 20};
|
||||
|
||||
compact_posting_list_t* list1 = compact_posting_list_t::create(4, ids, offset_index, 12, offsets);
|
||||
ASSERT_TRUE(list1->contains_atleast_one(&target_ids1[0], target_ids1.size()));
|
||||
ASSERT_FALSE(list1->contains_atleast_one(&target_ids2[0], target_ids2.size()));
|
||||
|
||||
compact_posting_list_t* list2 = static_cast<compact_posting_list_t*>(malloc(sizeof(compact_posting_list_t)));
|
||||
void* obj = SET_COMPACT_POSTING(list2);
|
||||
posting_t::upsert(obj, 3, {1, 5});
|
||||
|
||||
std::vector<uint32_t> target_ids3 = {1, 2, 3, 4, 100};
|
||||
std::vector<uint32_t> target_ids4 = {4, 5, 6, 100};
|
||||
|
||||
ASSERT_TRUE(COMPACT_POSTING_PTR(obj)->contains_atleast_one(&target_ids3[0], target_ids3.size()));
|
||||
ASSERT_FALSE(COMPACT_POSTING_PTR(obj)->contains_atleast_one(&target_ids4[0], target_ids4.size()));
|
||||
}
|
||||
|
||||
TEST(PostingListTest, DISABLED_Benchmark) {
|
||||
std::vector<uint32_t> offsets = {0, 1, 3};
|
||||
posting_list_t pl(4096);
|
||||
|
Loading…
x
Reference in New Issue
Block a user