Add ArrayUtils::skip_index_to_id.

This commit is contained in:
Harpreet Sangar 2023-04-25 09:39:51 +05:30
parent b67655c45b
commit 33be7e6c68
5 changed files with 37 additions and 14 deletions

View File

@ -16,4 +16,10 @@ public:
static size_t exclude_scalar(const uint32_t *src, const size_t lenSrc, const uint32_t *filter, const size_t lenFilter,
uint32_t **out);
/// Performs binary search to find the index of id. If id is not found, curr_index is set to the index of next bigger
/// number than id in the array.
/// \return Whether or not id was found in array.
static bool skip_index_to_id(uint32_t& curr_index, uint32_t const* const array, const uint32_t& array_len,
const uint32_t& id);
};

View File

@ -172,7 +172,7 @@ public:
/// Collects n doc ids while advancing the iterator. The ids present in excluded_result_ids are ignored. The
/// iterator may become invalid during this operation.
void get_n_ids(const uint32_t &n,
size_t& excluded_result_index,
uint32_t& excluded_result_index,
uint32_t const* const excluded_result_ids, const size_t& excluded_result_ids_size,
std::vector<uint32_t> &results);

View File

@ -149,4 +149,29 @@ size_t ArrayUtils::exclude_scalar(const uint32_t *A, const size_t lenA,
delete[] results;
return res_index;
}
bool ArrayUtils::skip_index_to_id(uint32_t& curr_index, uint32_t const* const array, const uint32_t& array_len,
const uint32_t& id) {
if (id <= array[curr_index]) {
return id == array[curr_index];
}
long start = curr_index, mid, end = array_len;
while (start <= end) {
mid = start + (end - start) / 2;
if (array[mid] == id) {
curr_index = mid;
return true;
} else if (array[mid] < id) {
start = mid + 1;
} else {
end = mid - 1;
}
}
curr_index = start;
return false;
}

View File

@ -959,7 +959,7 @@ void filter_result_iterator_t::skip_to(uint32_t id) {
}
if (is_filter_result_initialized) {
while (filter_result.docs[result_index] < id && ++result_index < filter_result.count);
ArrayUtils::skip_index_to_id(result_index, filter_result.docs, filter_result.count, id);
if (result_index >= filter_result.count) {
is_valid = false;
@ -1348,7 +1348,7 @@ void filter_result_iterator_t::get_n_ids(const uint32_t& n, std::vector<uint32_t
}
void filter_result_iterator_t::get_n_ids(const uint32_t& n,
size_t& excluded_result_index,
uint32_t & excluded_result_index,
uint32_t const* const excluded_result_ids, const size_t& excluded_result_ids_size,
std::vector<uint32_t>& results) {
if (excluded_result_ids == nullptr || excluded_result_ids_size == 0 ||
@ -1360,11 +1360,7 @@ void filter_result_iterator_t::get_n_ids(const uint32_t& n,
for (uint32_t count = 0; count < n && result_index < filter_result.count;) {
auto id = filter_result.docs[result_index++];
while (excluded_result_index < excluded_result_ids_size && excluded_result_ids[excluded_result_index] < id) {
excluded_result_index++;
}
if (excluded_result_index >= excluded_result_ids_size || excluded_result_ids[excluded_result_index] != id) {
if (!ArrayUtils::skip_index_to_id(excluded_result_index, excluded_result_ids, excluded_result_ids_size, id)) {
results.push_back(id);
count++;
}
@ -1375,11 +1371,7 @@ void filter_result_iterator_t::get_n_ids(const uint32_t& n,
}
for (uint32_t count = 0; count < n && is_valid;) {
while (excluded_result_index < excluded_result_ids_size && excluded_result_ids[excluded_result_index] < seq_id) {
excluded_result_index++;
}
if (excluded_result_index >= excluded_result_ids_size || excluded_result_ids[excluded_result_index] != seq_id) {
if (!ArrayUtils::skip_index_to_id(excluded_result_index, excluded_result_ids, excluded_result_ids_size, seq_id)) {
results.push_back(seq_id);
count++;
}

View File

@ -4966,7 +4966,7 @@ void Index::search_wildcard(filter_node_t const* const& filter_tree_root,
const auto parent_search_begin = search_begin_us;
const auto parent_search_stop_ms = search_stop_us;
auto parent_search_cutoff = search_cutoff;
size_t excluded_result_index = 0;
uint32_t excluded_result_index = 0;
for(size_t thread_id = 0; thread_id < num_threads && filter_result_iterator.is_valid; thread_id++) {
std::vector<uint32_t> batch_result_ids;