Optimize numerical filter ID consolidation.

This commit is contained in:
Kishore Nallan 2022-10-20 15:42:39 +05:30
parent 4808d1610a
commit 790fac008b
5 changed files with 29 additions and 20 deletions

View File

@ -150,6 +150,8 @@ public:
);
uint32_t* uncompress();
void uncompress(std::vector<uint32_t>& data);
};
template<class T>

View File

@ -102,6 +102,8 @@ public:
static void intersect(const std::vector<void*>& id_lists, std::vector<uint32_t>& result_ids);
static uint32_t* uncompress(void*& obj);
static void uncompress(void*& obj, std::vector<uint32_t>& ids);
};
template<class T>

View File

@ -626,6 +626,16 @@ bool id_list_t::take_id(result_iter_state_t& istate, uint32_t id) {
return true;
}
void id_list_t::uncompress(std::vector<uint32_t>& data) {
auto it = new_iterator();
data.reserve(data.size() + ids_length);
while(it.valid()) {
data.push_back(it.id());
it.next();
}
}
uint32_t* id_list_t::uncompress() {
uint32_t* arr = new uint32_t[ids_length];
auto it = new_iterator();

View File

@ -370,6 +370,18 @@ uint32_t* ids_t::uncompress(void*& obj) {
}
}
void ids_t::uncompress(void*& obj, std::vector<uint32_t>& ids) {
if(IS_COMPACT_IDS(obj)) {
compact_id_list_t* list = COMPACT_IDS_PTR(obj);
for(size_t i = 0; i < list->length; i++) {
ids.push_back(list->ids[i]);
}
} else {
id_list_t* list = (id_list_t*)(obj);
list->uncompress(ids);
}
}
void ids_t::block_intersector_t::split_lists(size_t concurrency,
std::vector<std::vector<id_list_t::iterator_t>>& partial_its_vec) {
const size_t num_blocks = this->id_lists[0]->num_blocks();

View File

@ -89,18 +89,11 @@ void num_tree_t::search(NUM_COMPARATOR comparator, int64_t value, uint32_t** ids
std::vector<uint32_t> consolidated_ids;
while(iter_ge_value != int64map.end()) {
uint32_t* values = ids_t::uncompress(iter_ge_value->second);
for(size_t i = 0; i < ids_t::num_ids(iter_ge_value->second); i++) {
consolidated_ids.push_back(values[i]);
}
delete [] values;
ids_t::uncompress(iter_ge_value->second, consolidated_ids);
iter_ge_value++;
}
gfx::timsort(consolidated_ids.begin(), consolidated_ids.end());
consolidated_ids.erase(unique(consolidated_ids.begin(), consolidated_ids.end()), consolidated_ids.end());
uint32_t *out = nullptr;
@ -118,23 +111,13 @@ void num_tree_t::search(NUM_COMPARATOR comparator, int64_t value, uint32_t** ids
auto it = int64map.begin();
while(it != iter_ge_value) {
uint32_t* values = ids_t::uncompress(it->second);
for(size_t i = 0; i < ids_t::num_ids(it->second); i++) {
consolidated_ids.push_back(values[i]);
}
delete [] values;
ids_t::uncompress(it->second, consolidated_ids);
it++;
}
// for LESS_THAN_EQUALS, check if last iter entry is equal to value
if(it != int64map.end() && comparator == LESS_THAN_EQUALS && it->first == value) {
uint32_t* values = ids_t::uncompress(it->second);
for(size_t i = 0; i < ids_t::num_ids(it->second); i++) {
consolidated_ids.push_back(values[i]);
}
delete [] values;
ids_t::uncompress(it->second, consolidated_ids);
}
gfx::timsort(consolidated_ids.begin(), consolidated_ids.end());