Undo id list merge using priority queue.

This commit is contained in:
Harpreet Sangar 2023-05-16 18:16:43 +05:30
parent 548aee4f99
commit ef77b58f2b
3 changed files with 9 additions and 157 deletions

View File

@ -30,11 +30,6 @@ public:
void range_inclusive_search(int64_t start, int64_t end, uint32_t** ids, size_t& ids_len);
void range_inclusive_search_iterators(int64_t start,
int64_t end,
std::vector<id_list_t::iterator_t>& id_list_iterators,
std::vector<id_list_t*>& expanded_id_lists);
void approx_range_inclusive_search_count(int64_t start, int64_t end, uint32_t& ids_len);
void range_inclusive_contains(const int64_t& start, const int64_t& end,
@ -47,11 +42,6 @@ public:
void search(NUM_COMPARATOR comparator, int64_t value, uint32_t** ids, size_t& ids_len);
void search_iterators(NUM_COMPARATOR comparator,
int64_t value,
std::vector<id_list_t::iterator_t>& id_list_iterators,
std::vector<id_list_t*>& expanded_id_lists);
void approx_search_count(NUM_COMPARATOR comparator, int64_t value, uint32_t& ids_len);
void remove(uint64_t value, uint32_t id);
@ -65,9 +55,4 @@ public:
uint32_t* const& context_ids,
size_t& result_ids_len,
uint32_t*& result_ids) const;
void merge_id_list_iterators(std::vector<id_list_t::iterator_t>& id_list_iterators,
const NUM_COMPARATOR &comparator,
uint32_t*& result_ids,
uint32_t& result_ids_len) const;
};

View File

@ -651,40 +651,22 @@ void filter_result_iterator_t::init() {
for (size_t fi = 0; fi < a_filter.values.size(); fi++) {
const std::string& filter_value = a_filter.values[fi];
int64_t value = (int64_t)std::stol(filter_value);
std::vector<id_list_t::iterator_t> id_list_iterators;
std::vector<id_list_t*> expanded_id_lists;
size_t result_size = filter_result.count;
if (a_filter.comparators[fi] == RANGE_INCLUSIVE && fi+1 < a_filter.values.size()) {
const std::string& next_filter_value = a_filter.values[fi + 1];
auto const range_end_value = (int64_t)std::stol(next_filter_value);
num_tree->range_inclusive_search_iterators(value, range_end_value, id_list_iterators, expanded_id_lists);
num_tree->range_inclusive_search(value, range_end_value, &filter_result.docs, result_size);
fi++;
} else if (a_filter.comparators[fi] == NOT_EQUALS) {
numeric_not_equals_filter(num_tree, value,
index->seq_ids->uncompress(), index->seq_ids->num_ids(),
filter_result.docs, result_size);
} else {
num_tree->search_iterators(a_filter.comparators[fi] == NOT_EQUALS ? EQUALS : a_filter.comparators[fi],
value, id_list_iterators, expanded_id_lists);
num_tree->search(a_filter.comparators[fi], value, &filter_result.docs, result_size);
}
uint32_t* filter_match_ids = nullptr;
uint32_t filter_ids_length;
num_tree->merge_id_list_iterators(id_list_iterators, a_filter.comparators[fi],
filter_match_ids, filter_ids_length);
if (a_filter.comparators[fi] == NOT_EQUALS) {
apply_not_equals(index->seq_ids->uncompress(), index->seq_ids->num_ids(),
filter_match_ids, filter_ids_length);
}
uint32_t *out = nullptr;
filter_result.count = ArrayUtils::or_scalar(filter_match_ids, filter_ids_length,
filter_result.docs, filter_result.count, &out);
delete [] filter_match_ids;
delete [] filter_result.docs;
filter_result.docs = out;
for(id_list_t* expanded_id_list: expanded_id_lists) {
delete expanded_id_list;
}
filter_result.count = result_size;
}
if (a_filter.apply_not_equals) {
@ -1400,7 +1382,7 @@ void filter_result_iterator_t::get_n_ids(const uint32_t& n,
}
filter_result_iterator_t::filter_result_iterator_t(uint32_t approx_filter_ids_length) :
approx_filter_ids_length(approx_filter_ids_length) {
approx_filter_ids_length(approx_filter_ids_length) {
filter_node = new filter_node_t(AND, nullptr, nullptr);
delete_filter_node = true;
}

View File

@ -43,30 +43,6 @@ void num_tree_t::range_inclusive_search(int64_t start, int64_t end, uint32_t** i
*ids = out;
}
void num_tree_t::range_inclusive_search_iterators(int64_t start,
int64_t end,
std::vector<id_list_t::iterator_t>& id_list_iterators,
std::vector<id_list_t*>& expanded_id_lists) {
if (int64map.empty()) {
return;
}
auto it_start = int64map.lower_bound(start); // iter values will be >= start
std::vector<void*> raw_id_lists;
while (it_start != int64map.end() && it_start->first <= end) {
raw_id_lists.push_back(it_start->second);
it_start++;
}
std::vector<id_list_t*> id_lists;
ids_t::to_expanded_id_lists(raw_id_lists, id_lists, expanded_id_lists);
for (const auto &id_list: id_lists) {
id_list_iterators.emplace_back(id_list->new_iterator());
}
}
void num_tree_t::approx_range_inclusive_search_count(int64_t start, int64_t end, uint32_t& ids_len) {
if (int64map.empty()) {
return;
@ -211,60 +187,6 @@ void num_tree_t::search(NUM_COMPARATOR comparator, int64_t value, uint32_t** ids
}
}
void num_tree_t::search_iterators(NUM_COMPARATOR comparator,
int64_t value,
std::vector<id_list_t::iterator_t>& id_list_iterators,
std::vector<id_list_t*>& expanded_id_lists) {
if (int64map.empty()) {
return ;
}
std::vector<void*> raw_id_lists;
if (comparator == EQUALS) {
const auto& it = int64map.find(value);
if (it != int64map.end()) {
raw_id_lists.emplace_back(it->second);
}
} else if (comparator == GREATER_THAN || comparator == GREATER_THAN_EQUALS) {
// iter entries will be >= value, or end() if all entries are before value
auto iter_ge_value = int64map.lower_bound(value);
if(iter_ge_value == int64map.end()) {
return ;
}
if(comparator == GREATER_THAN && iter_ge_value->first == value) {
iter_ge_value++;
}
while(iter_ge_value != int64map.end()) {
raw_id_lists.emplace_back(iter_ge_value->second);
iter_ge_value++;
}
} else if(comparator == LESS_THAN || comparator == LESS_THAN_EQUALS) {
// iter entries will be >= value, or end() if all entries are before value
auto iter_ge_value = int64map.lower_bound(value);
auto it = int64map.begin();
while(it != iter_ge_value) {
raw_id_lists.emplace_back(it->second);
it++;
}
// for LESS_THAN_EQUALS, check if last iter entry is equal to value
if(it != int64map.end() && comparator == LESS_THAN_EQUALS && it->first == value) {
raw_id_lists.emplace_back(it->second);
}
}
std::vector<id_list_t*> id_lists;
ids_t::to_expanded_id_lists(raw_id_lists, id_lists, expanded_id_lists);
for (const auto &id_list: id_lists) {
id_list_iterators.emplace_back(id_list->new_iterator());
}
}
void num_tree_t::approx_search_count(NUM_COMPARATOR comparator, int64_t value, uint32_t& ids_len) {
if (int64map.empty()) {
return;
@ -429,40 +351,3 @@ num_tree_t::~num_tree_t() {
ids_t::destroy_list(kv.second);
}
}
void num_tree_t::merge_id_list_iterators(std::vector<id_list_t::iterator_t>& id_list_iterators,
const NUM_COMPARATOR &comparator,
uint32_t*& result_ids,
uint32_t& result_ids_len) const {
struct comp {
bool operator()(const id_list_t::iterator_t *lhs, const id_list_t::iterator_t *rhs) const {
return lhs->id() > rhs->id();
}
};
std::priority_queue<id_list_t::iterator_t*, std::vector<id_list_t::iterator_t*>, comp> iter_queue;
for (auto& id_list_iterator: id_list_iterators) {
if (id_list_iterator.valid()) {
iter_queue.push(&id_list_iterator);
}
}
std::vector<uint32_t> consolidated_ids;
while (!iter_queue.empty()) {
id_list_t::iterator_t* iter = iter_queue.top();
iter_queue.pop();
consolidated_ids.push_back(iter->id());
iter->next();
if (iter->valid()) {
iter_queue.push(iter);
}
}
consolidated_ids.erase(unique(consolidated_ids.begin(), consolidated_ids.end()), consolidated_ids.end());
result_ids_len = consolidated_ids.size();
result_ids = new uint32_t[consolidated_ids.size()];
std::copy(consolidated_ids.begin(), consolidated_ids.end(), result_ids);
}