Expose filter ids from iterator where possible.

This commit is contained in:
Harpreet Sangar 2023-04-18 16:53:24 +05:30
parent ac4cb54436
commit d44e2e4c7a
3 changed files with 46 additions and 18 deletions

View File

@ -127,6 +127,14 @@ private:
void doc_matching_string_filter(bool field_is_array);
public:
uint32_t* get_ids() {
return filter_result.docs;
}
uint32_t get_length() {
return filter_result.count;
}
uint32_t seq_id = 0;
/// Collection name -> references
std::map<std::string, reference_filter_result_t> reference;
@ -180,4 +188,6 @@ public:
/// Performs AND with the contents of A and allocates a new array of results.
/// \return size of the results array
uint32_t and_scalar(const uint32_t* A, const uint32_t& lenA, uint32_t*& results);
bool can_get_ids();
};

View File

@ -1001,17 +1001,10 @@ uint32_t filter_result_iterator_t::to_filter_id_array(uint32_t*& filter_array) {
return 0;
}
if (!filter_node->isOperator) {
const filter a_filter = filter_node->filter_exp;
field f = index->search_schema.at(a_filter.field_name);
if (!a_filter.referenced_collection_name.empty() || a_filter.field_name == "id" ||
(index->field_is_indexed(a_filter.field_name) && (f.is_integer() || f.is_float() || f.is_bool()))) {
filter_array = new uint32_t[filter_result.count];
std::copy(filter_result.docs, filter_result.docs + filter_result.count, filter_array);
return filter_result.count;
}
if (can_get_ids()) {
filter_array = new uint32_t[filter_result.count];
std::copy(filter_result.docs, filter_result.docs + filter_result.count, filter_array);
return filter_result.count;
}
std::vector<uint32_t> filter_ids;
@ -1031,6 +1024,10 @@ uint32_t filter_result_iterator_t::and_scalar(const uint32_t* A, const uint32_t&
return 0;
}
if (can_get_ids()) {
return ArrayUtils::and_scalar(A, lenA, filter_result.docs, filter_result.count, &results);
}
std::vector<uint32_t> filter_ids;
for (uint32_t i = 0; i < lenA; i++) {
auto result = valid(A[i]);
@ -1121,3 +1118,17 @@ filter_result_iterator_t &filter_result_iterator_t::operator=(filter_result_iter
return *this;
}
bool filter_result_iterator_t::can_get_ids() {
if (!filter_node->isOperator) {
const filter a_filter = filter_node->filter_exp;
field f = index->search_schema.at(a_filter.field_name);
if (!a_filter.referenced_collection_name.empty() || a_filter.field_name == "id" ||
(index->field_is_indexed(a_filter.field_name) && (f.is_integer() || f.is_float() || f.is_bool()))) {
return true;
}
}
return false;
}

View File

@ -4951,14 +4951,23 @@ void Index::search_wildcard(filter_node_t const* const& filter_tree_root,
const auto parent_search_stop_ms = search_stop_us;
auto parent_search_cutoff = search_cutoff;
for(size_t thread_id = 0; thread_id < num_threads && filter_result_iterator.valid(); thread_id++) {
for(size_t thread_id = 0; thread_id < num_threads &&
(filter_result_iterator.can_get_ids() ?
filter_index < filter_result_iterator.get_length() :
filter_result_iterator.valid()); thread_id++) {
std::vector<uint32_t> batch_result_ids;
batch_result_ids.reserve(window_size);
do {
batch_result_ids.push_back(filter_result_iterator.seq_id);
filter_result_iterator.next();
} while (batch_result_ids.size() < window_size && filter_result_iterator.valid());
if (filter_result_iterator.can_get_ids()) {
while (batch_result_ids.size() < window_size && filter_index < filter_result_iterator.get_length()) {
batch_result_ids.push_back(filter_result_iterator.get_ids()[filter_index++]);
}
} else {
do {
batch_result_ids.push_back(filter_result_iterator.seq_id);
filter_result_iterator.next();
} while (batch_result_ids.size() < window_size && filter_result_iterator.valid());
}
num_queued++;
@ -5019,8 +5028,6 @@ void Index::search_wildcard(filter_node_t const* const& filter_tree_root,
parent_search_cutoff = parent_search_cutoff || search_cutoff;
cv_process.notify_one();
});
filter_index += batch_result_ids.size();
}
std::unique_lock<std::mutex> lock_process(m_process);