Wrapper for positing list intersection.

This commit is contained in:
Kishore Nallan 2021-06-13 09:51:46 +05:30
parent 0e967a9c44
commit d3c8cb442e
4 changed files with 63 additions and 9 deletions

View File

@ -28,6 +28,7 @@ struct compact_posting_list_t {
void erase(uint32_t id);
uint32_t first_id();
uint32_t last_id();
uint32_t num_ids() const;
@ -43,5 +44,9 @@ public:
static void erase(void*& obj, uint32_t id);
static uint32_t num_ids(void*& obj);
static uint32_t num_ids(const void* obj);
static uint32_t first_id(const void* obj);
static void intersect(const std::vector<void*>& posting_lists, std::vector<uint32_t>& result_ids);
};

View File

@ -102,10 +102,11 @@ public:
size_t num_ids();
uint32_t first_id();
block_t* block_of(last_id_t id);
iterator_t new_iterator();
static posting_list_t* intersect(const std::vector<posting_list_t*>& posting_lists,
std::vector<uint32_t>& result_ids);
static void intersect(const std::vector<posting_list_t*>& posting_lists, std::vector<uint32_t>& result_ids);
};

View File

@ -184,6 +184,14 @@ uint32_t compact_posting_list_t::num_ids() const {
return ids_length;
}
uint32_t compact_posting_list_t::first_id() {
if(length == 0) {
return 0;
}
return id_offsets[id_offsets[0] + 1];
}
/* posting operations */
void posting_t::upsert(void*& obj, uint32_t id, const std::vector<uint32_t>& offsets) {
@ -265,12 +273,47 @@ void posting_t::erase(void*& obj, uint32_t id) {
}
}
uint32_t posting_t::num_ids(void*& obj) {
uint32_t posting_t::num_ids(const void* obj) {
if(IS_COMPACT_POSTING(obj)) {
compact_posting_list_t* list = (compact_posting_list_t*) RAW_POSTING_PTR(obj);
compact_posting_list_t* list = COMPACT_POSTING_PTR(obj);
return list->num_ids();
} else {
posting_list_t* list = (posting_list_t*) RAW_POSTING_PTR(obj);
return list->num_ids();
}
}
uint32_t posting_t::first_id(const void* obj) {
if(IS_COMPACT_POSTING(obj)) {
compact_posting_list_t* list = COMPACT_POSTING_PTR(obj);
return list->first_id();
} else {
posting_list_t* list = (posting_list_t*) RAW_POSTING_PTR(obj);
return list->first_id();
}
}
void posting_t::intersect(const std::vector<void*>& raw_posting_lists, std::vector<uint32_t>& result_ids) {
// we will have to convert the compact posting list (if any) to full form
std::vector<posting_list_t*> plists;
std::vector<uint32_t> expanded_plist_indices;
for(size_t i = 0; i < raw_posting_lists.size(); i++) {
auto raw_posting_list = raw_posting_lists[i];
if(IS_COMPACT_POSTING(raw_posting_list)) {
auto compact_posting_list = COMPACT_POSTING_PTR(raw_posting_list);
plists.emplace_back(compact_posting_list->to_full_posting_list());
expanded_plist_indices.push_back(i);
} else {
posting_list_t* full_posting_list = (posting_list_t*) RAW_POSTING_PTR(raw_posting_list);
plists.emplace_back(full_posting_list);
}
}
posting_list_t::intersect(plists, result_ids);
for(uint32_t expanded_plist_index: expanded_plist_indices) {
delete plists[expanded_plist_index];
}
}

View File

@ -514,6 +514,14 @@ size_t posting_list_t::num_blocks() {
return id_block_map.size();
}
uint32_t posting_list_t::first_id() {
if(ids_length == 0) {
return 0;
}
return root_block.ids.at(0);
}
posting_list_t::block_t* posting_list_t::block_of(last_id_t id) {
auto it = id_block_map.find(id);
if(it != id_block_map.end()) {
@ -523,8 +531,7 @@ posting_list_t::block_t* posting_list_t::block_of(last_id_t id) {
}
// Inspired by: https://stackoverflow.com/a/25509185/131050
posting_list_t* posting_list_t::intersect(const std::vector<posting_list_t*>& posting_lists,
std::vector<uint32_t>& result_ids) {
void posting_list_t::intersect(const std::vector<posting_list_t*>& posting_lists, std::vector<uint32_t>& result_ids) {
auto its = std::vector<posting_list_t::iterator_t>();
its.reserve(posting_lists.size());
@ -557,8 +564,6 @@ posting_list_t* posting_list_t::intersect(const std::vector<posting_list_t*>& po
}
}
}
return nullptr;
}
bool posting_list_t::at_end(const std::vector<posting_list_t::iterator_t>& its) {