Add filter_result_iterator_t::get_n_ids.

Use `is_valid` instead of `valid()`.
Handle special `_all_` field name in filtering logic.
This commit is contained in:
Harpreet Sangar 2023-04-20 13:19:42 +05:30
parent 034f20f4e4
commit 9896541874
7 changed files with 227 additions and 175 deletions

View File

@ -99,6 +99,7 @@ private:
/// Stores the result of the filters that cannot be iterated.
filter_result_t filter_result;
bool is_filter_result_initialized = false;
/// Initialized in case of filter on string field.
/// Sample filter values: ["foo bar", "baz"]. Each filter value is split into tokens. We get posting list iterator
@ -108,9 +109,6 @@ private:
std::vector<std::vector<posting_list_t::iterator_t>> posting_list_iterators;
std::vector<posting_list_t*> expanded_plists;
/// Set to false when this iterator or it's subtree becomes invalid.
bool is_valid = true;
/// Initializes the state of iterator node after it's creation.
void init();
@ -126,18 +124,18 @@ private:
/// Finds the next match for a filter on string field.
void doc_matching_string_filter(bool field_is_array);
/// Returns true when doc and reference hold valid values. Used in conjunction with next() and skip_to(id).
[[nodiscard]] bool valid();
public:
uint32_t* get_ids() {
return filter_result.docs;
}
uint32_t get_length() {
return filter_result.count;
}
uint32_t seq_id = 0;
/// Collection name -> references
std::map<std::string, reference_filter_result_t> reference;
/// Set to false when this iterator or it's subtree becomes invalid.
bool is_valid = true;
/// Initialization status of the iterator.
Option<bool> status = Option(true);
/// Holds the upper-bound of the number of seq ids this iterator would match.
@ -156,9 +154,6 @@ public:
/// Returns the status of the initialization of iterator tree.
Option<bool> init_status();
/// Returns true when doc and reference hold valid values. Used in conjunction with next() and skip_to(id).
[[nodiscard]] bool valid();
/// Returns a tri-state:
/// 0: id is not valid
/// 1: id is valid
@ -171,6 +166,9 @@ public:
/// operation.
void next();
/// Collects n doc ids while advancing the iterator. The iterator may become invalid during this operation.
void get_n_ids(const uint32_t& n, std::vector<uint32_t>& results);
/// Advances the iterator until the doc value reaches or just overshoots id. The iterator may become invalid during
/// this operation.
void skip_to(uint32_t id);
@ -188,6 +186,4 @@ public:
/// Performs AND with the contents of A and allocates a new array of results.
/// \return size of the results array
uint32_t and_scalar(const uint32_t* A, const uint32_t& lenA, uint32_t*& results);
bool can_get_ids();
};

View File

@ -563,6 +563,7 @@ public:
static const int DROP_TOKENS_THRESHOLD = 1;
// "_all_" is a special field that maps to all the ids in the index.
static constexpr const char* SEQ_IDS_FIELD = "_all_";
static constexpr const char* SEQ_IDS_FILTER = "_all_: 1";
Index() = delete;

View File

@ -991,7 +991,7 @@ const uint32_t* get_allowed_doc_ids(art_tree *t, const std::string& prev_token,
std::vector<uint32_t> prev_leaf_ids;
posting_t::merge({prev_leaf->values}, prev_leaf_ids);
if(filter_result_iterator.valid()) {
if(filter_result_iterator.is_valid) {
prev_token_doc_ids_len = filter_result_iterator.and_scalar(prev_leaf_ids.data(), prev_leaf_ids.size(),
prev_token_doc_ids);
} else {
@ -1692,6 +1692,7 @@ int art_fuzzy_search_i(art_tree *t, const unsigned char *term, const int term_le
// documents that contain the previous token and/or filter ids
size_t allowed_doc_ids_len = 0;
const uint32_t* allowed_doc_ids = get_allowed_doc_ids(t, prev_token, filter_result_iterator, allowed_doc_ids_len);
filter_result_iterator.reset();
for(auto node: nodes) {
art_topk_iter(node, token_order, max_words, exact_leaf,

View File

@ -283,6 +283,9 @@ Option<bool> toFilter(const std::string expression,
}
}
return Option<bool>(true);
} else if (field_name == Index::SEQ_IDS_FIELD) {
filter_exp = {field_name, {}, {}};
return Option<bool>(true);
}
auto field_it = search_schema.find(field_name);

View File

@ -271,8 +271,12 @@ void filter_result_iterator_t::advance_string_filter_token_iterators() {
for (uint32_t i = 0; i < posting_list_iterators.size(); i++) {
auto& filter_value_tokens = posting_list_iterators[i];
if (filter_value_tokens[0].valid() && filter_value_tokens[0].id() == seq_id) {
for (auto& iter: filter_value_tokens) {
if (!filter_value_tokens[0].valid() || filter_value_tokens[0].id() != seq_id) {
continue;
}
for (auto& iter: filter_value_tokens) {
if (iter.valid()) {
iter.next();
}
}
@ -362,10 +366,7 @@ void filter_result_iterator_t::next() {
return;
}
const filter a_filter = filter_node->filter_exp;
bool is_referenced_filter = !a_filter.referenced_collection_name.empty();
if (is_referenced_filter) {
if (is_filter_result_initialized) {
if (++result_index >= filter_result.count) {
is_valid = false;
return;
@ -380,15 +381,7 @@ void filter_result_iterator_t::next() {
return;
}
if (a_filter.field_name == "id") {
if (++result_index >= filter_result.count) {
is_valid = false;
return;
}
seq_id = filter_result.docs[result_index];
return;
}
const filter a_filter = filter_node->filter_exp;
if (!index->field_is_indexed(a_filter.field_name)) {
is_valid = false;
@ -397,16 +390,7 @@ void filter_result_iterator_t::next() {
field f = index->search_schema.at(a_filter.field_name);
if (f.is_integer() || f.is_float() || f.is_bool()) {
result_index++;
if (result_index >= filter_result.count) {
is_valid = false;
return;
}
seq_id = filter_result.docs[result_index];
return;
} else if (f.is_string()) {
if (f.is_string()) {
if (filter_node->filter_exp.apply_not_equals) {
if (++seq_id < result_index) {
return;
@ -443,6 +427,41 @@ void filter_result_iterator_t::next() {
}
}
void numeric_not_equals_filter(num_tree_t* const num_tree,
const int64_t value,
uint32_t*&& all_ids,
uint32_t&& all_ids_length,
uint32_t*& result_ids,
size_t& result_ids_len) {
uint32_t* to_exclude_ids = nullptr;
size_t to_exclude_ids_len = 0;
num_tree->search(EQUALS, value, &to_exclude_ids, to_exclude_ids_len);
result_ids_len = ArrayUtils::exclude_scalar(all_ids, all_ids_length, to_exclude_ids, to_exclude_ids_len, &result_ids);
delete[] all_ids;
delete[] to_exclude_ids;
}
void apply_not_equals(uint32_t*&& all_ids,
uint32_t&& all_ids_length,
uint32_t*& result_ids,
uint32_t& result_ids_len) {
uint32_t* to_include_ids = nullptr;
size_t to_include_ids_len = 0;
to_include_ids_len = ArrayUtils::exclude_scalar(all_ids, all_ids_length, result_ids,
result_ids_len, &to_include_ids);
delete[] all_ids;
delete[] result_ids;
result_ids = to_include_ids;
result_ids_len = to_include_ids_len;
}
void filter_result_iterator_t::init() {
if (filter_node == nullptr) {
return;
@ -487,6 +506,11 @@ void filter_result_iterator_t::init() {
}
seq_id = filter_result.docs[result_index];
for (auto const& item: filter_result.reference_filter_results) {
reference[item.first] = item.second[result_index];
}
is_filter_result_initialized = true;
return;
}
@ -507,7 +531,22 @@ void filter_result_iterator_t::init() {
filter_result.count = result_ids.size();
filter_result.docs = new uint32_t[result_ids.size()];
std::copy(result_ids.begin(), result_ids.end(), filter_result.docs);
seq_id = filter_result.docs[result_index];
is_filter_result_initialized = true;
return;
} else if (a_filter.field_name == Index::SEQ_IDS_FIELD) {
if (index->seq_ids->num_ids() == 0) {
is_valid = false;
return;
}
filter_result.count = index->seq_ids->num_ids();
filter_result.docs = index->seq_ids->uncompress();
seq_id = filter_result.docs[result_index];
is_filter_result_initialized = true;
return;
}
if (!index->field_is_indexed(a_filter.field_name)) {
@ -520,28 +559,40 @@ void filter_result_iterator_t::init() {
if (f.is_integer()) {
auto num_tree = index->numerical_index.at(a_filter.field_name);
// TODO: Handle not equals
for (size_t fi = 0; fi < a_filter.values.size(); fi++) {
const std::string& filter_value = a_filter.values[fi];
int64_t value = (int64_t)std::stol(filter_value);
size_t result_size = filter_result.count;
if (a_filter.comparators[fi] == RANGE_INCLUSIVE && fi+1 < a_filter.values.size()) {
const std::string& next_filter_value = a_filter.values[fi + 1];
auto const range_end_value = (int64_t)std::stol(next_filter_value);
num_tree->range_inclusive_search(value, range_end_value, &filter_result.docs,
reinterpret_cast<size_t &>(filter_result.count));
num_tree->range_inclusive_search(value, range_end_value, &filter_result.docs, result_size);
fi++;
} else if (a_filter.comparators[fi] == NOT_EQUALS) {
numeric_not_equals_filter(num_tree, value,
index->seq_ids->uncompress(), index->seq_ids->num_ids(),
filter_result.docs, result_size);
} else {
num_tree->search(a_filter.comparators[fi], value,
&filter_result.docs, reinterpret_cast<size_t &>(filter_result.count));
num_tree->search(a_filter.comparators[fi], value, &filter_result.docs, result_size);
}
filter_result.count = result_size;
}
if (a_filter.apply_not_equals) {
apply_not_equals(index->seq_ids->uncompress(), index->seq_ids->num_ids(),
filter_result.docs, filter_result.count);
}
if (filter_result.count == 0) {
is_valid = false;
return;
}
seq_id = filter_result.docs[result_index];
is_filter_result_initialized = true;
return;
} else if (f.is_float()) {
auto num_tree = index->numerical_index.at(a_filter.field_name);
@ -550,22 +601,36 @@ void filter_result_iterator_t::init() {
float value = (float)std::atof(filter_value.c_str());
int64_t float_int64 = Index::float_to_int64_t(value);
size_t result_size = filter_result.count;
if (a_filter.comparators[fi] == RANGE_INCLUSIVE && fi+1 < a_filter.values.size()) {
const std::string& next_filter_value = a_filter.values[fi+1];
int64_t range_end_value = Index::float_to_int64_t((float) std::atof(next_filter_value.c_str()));
num_tree->range_inclusive_search(float_int64, range_end_value, &filter_result.docs,
reinterpret_cast<size_t &>(filter_result.count));
num_tree->range_inclusive_search(float_int64, range_end_value, &filter_result.docs, result_size);
fi++;
} else if (a_filter.comparators[fi] == NOT_EQUALS) {
numeric_not_equals_filter(num_tree, float_int64,
index->seq_ids->uncompress(), index->seq_ids->num_ids(),
filter_result.docs, result_size);
} else {
num_tree->search(a_filter.comparators[fi], float_int64,
&filter_result.docs, reinterpret_cast<size_t &>(filter_result.count));
num_tree->search(a_filter.comparators[fi], float_int64, &filter_result.docs, result_size);
}
filter_result.count = result_size;
}
if (a_filter.apply_not_equals) {
apply_not_equals(index->seq_ids->uncompress(), index->seq_ids->num_ids(),
filter_result.docs, filter_result.count);
}
if (filter_result.count == 0) {
is_valid = false;
return;
}
seq_id = filter_result.docs[result_index];
is_filter_result_initialized = true;
return;
} else if (f.is_bool()) {
auto num_tree = index->numerical_index.at(a_filter.field_name);
@ -573,16 +638,32 @@ void filter_result_iterator_t::init() {
for (const std::string& filter_value : a_filter.values) {
int64_t bool_int64 = (filter_value == "1") ? 1 : 0;
num_tree->search(a_filter.comparators[value_index], bool_int64,
&filter_result.docs, reinterpret_cast<size_t &>(filter_result.count));
size_t result_size = filter_result.count;
if (a_filter.comparators[value_index] == NOT_EQUALS) {
numeric_not_equals_filter(num_tree, bool_int64,
index->seq_ids->uncompress(), index->seq_ids->num_ids(),
filter_result.docs, result_size);
} else {
num_tree->search(a_filter.comparators[value_index], bool_int64, &filter_result.docs, result_size);
}
filter_result.count = result_size;
value_index++;
}
if (a_filter.apply_not_equals) {
apply_not_equals(index->seq_ids->uncompress(), index->seq_ids->num_ids(),
filter_result.docs, filter_result.count);
}
if (filter_result.count == 0) {
is_valid = false;
return;
}
seq_id = filter_result.docs[result_index];
is_filter_result_initialized = true;
return;
} else if (f.is_string()) {
art_tree* t = index->search_index.at(a_filter.field_name);
@ -684,13 +765,13 @@ bool filter_result_iterator_t::valid() {
}
}
const filter a_filter = filter_node->filter_exp;
if (!a_filter.referenced_collection_name.empty() || a_filter.field_name == "id") {
if (is_filter_result_initialized) {
is_valid = result_index < filter_result.count;
return is_valid;
}
const filter a_filter = filter_node->filter_exp;
if (!index->field_is_indexed(a_filter.field_name)) {
is_valid = false;
return is_valid;
@ -698,10 +779,7 @@ bool filter_result_iterator_t::valid() {
field f = index->search_schema.at(a_filter.field_name);
if (f.is_integer() || f.is_float() || f.is_bool()) {
is_valid = result_index < filter_result.count;
return is_valid;
} else if (f.is_string()) {
if (f.is_string()) {
if (filter_node->filter_exp.apply_not_equals) {
return seq_id < result_index;
}
@ -741,10 +819,7 @@ void filter_result_iterator_t::skip_to(uint32_t id) {
return;
}
const filter a_filter = filter_node->filter_exp;
bool is_referenced_filter = !a_filter.referenced_collection_name.empty();
if (is_referenced_filter) {
if (is_filter_result_initialized) {
while (filter_result.docs[result_index] < id && ++result_index < filter_result.count);
if (result_index >= filter_result.count) {
@ -761,17 +836,7 @@ void filter_result_iterator_t::skip_to(uint32_t id) {
return;
}
if (a_filter.field_name == "id") {
while (filter_result.docs[result_index] < id && ++result_index < filter_result.count);
if (result_index >= filter_result.count) {
is_valid = false;
return;
}
seq_id = filter_result.docs[result_index];
return;
}
const filter a_filter = filter_node->filter_exp;
if (!index->field_is_indexed(a_filter.field_name)) {
is_valid = false;
@ -780,17 +845,7 @@ void filter_result_iterator_t::skip_to(uint32_t id) {
field f = index->search_schema.at(a_filter.field_name);
if (f.is_integer() || f.is_float() || f.is_bool()) {
while(result_index < filter_result.count && filter_result.docs[result_index] < id) {
result_index++;
}
if (result_index >= filter_result.count) {
is_valid = false;
}
return;
} else if (f.is_string()) {
if (f.is_string()) {
if (filter_node->filter_exp.apply_not_equals) {
if (id < seq_id) {
return;
@ -897,7 +952,7 @@ bool filter_result_iterator_t::contains_atleast_one(const void *obj) {
compact_posting_list_t* list = COMPACT_POSTING_PTR(obj);
size_t i = 0;
while(i < list->length && valid()) {
while(i < list->length && is_valid) {
size_t num_existing_offsets = list->id_offsets[i];
size_t existing_id = list->id_offsets[i + num_existing_offsets + 1];
@ -916,7 +971,7 @@ bool filter_result_iterator_t::contains_atleast_one(const void *obj) {
auto list = (posting_list_t*)(obj);
posting_list_t::iterator_t it = list->new_iterator();
while(it.valid() && valid()) {
while(it.valid() && is_valid) {
uint32_t id = it.id();
if(id == seq_id) {
@ -943,6 +998,7 @@ void filter_result_iterator_t::reset() {
// Reset the subtrees then apply operators to arrive at the first valid doc.
left_it->reset();
right_it->reset();
is_valid = true;
if (filter_node->filter_operator == AND) {
and_filter_iterators();
@ -953,10 +1009,7 @@ void filter_result_iterator_t::reset() {
return;
}
const filter a_filter = filter_node->filter_exp;
bool is_referenced_filter = !a_filter.referenced_collection_name.empty();
if (is_referenced_filter || a_filter.field_name == "id") {
if (is_filter_result_initialized) {
if (filter_result.count == 0) {
is_valid = false;
return;
@ -964,27 +1017,25 @@ void filter_result_iterator_t::reset() {
result_index = 0;
seq_id = filter_result.docs[result_index];
reference.clear();
for (auto const& item: filter_result.reference_filter_results) {
reference[item.first] = item.second[result_index];
}
is_valid = true;
return;
}
const filter a_filter = filter_node->filter_exp;
if (!index->field_is_indexed(a_filter.field_name)) {
return;
}
field f = index->search_schema.at(a_filter.field_name);
if (f.is_integer() || f.is_float() || f.is_bool()) {
if (filter_result.count == 0) {
is_valid = false;
return;
}
result_index = 0;
seq_id = filter_result.docs[result_index];
is_valid = true;
return;
} else if (f.is_string()) {
if (f.is_string()) {
posting_list_iterators.clear();
for(auto expanded_plist: expanded_plists) {
delete expanded_plist;
@ -997,11 +1048,11 @@ void filter_result_iterator_t::reset() {
}
uint32_t filter_result_iterator_t::to_filter_id_array(uint32_t*& filter_array) {
if (!valid()) {
if (!is_valid) {
return 0;
}
if (can_get_ids()) {
if (is_filter_result_initialized) {
filter_array = new uint32_t[filter_result.count];
std::copy(filter_result.docs, filter_result.docs + filter_result.count, filter_array);
return filter_result.count;
@ -1011,7 +1062,7 @@ uint32_t filter_result_iterator_t::to_filter_id_array(uint32_t*& filter_array) {
do {
filter_ids.push_back(seq_id);
next();
} while (valid());
} while (is_valid);
filter_array = new uint32_t[filter_ids.size()];
std::copy(filter_ids.begin(), filter_ids.end(), filter_array);
@ -1020,11 +1071,11 @@ uint32_t filter_result_iterator_t::to_filter_id_array(uint32_t*& filter_array) {
}
uint32_t filter_result_iterator_t::and_scalar(const uint32_t* A, const uint32_t& lenA, uint32_t*& results) {
if (!valid()) {
if (!is_valid) {
return 0;
}
if (can_get_ids()) {
if (is_filter_result_initialized) {
return ArrayUtils::and_scalar(A, lenA, filter_result.docs, filter_result.count, &results);
}
@ -1115,20 +1166,23 @@ filter_result_iterator_t &filter_result_iterator_t::operator=(filter_result_iter
seq_id = obj.seq_id;
reference = std::move(obj.reference);
status = std::move(obj.status);
is_filter_result_initialized = obj.is_filter_result_initialized;
return *this;
}
bool filter_result_iterator_t::can_get_ids() {
if (!filter_node->isOperator) {
const filter a_filter = filter_node->filter_exp;
field f = index->search_schema.at(a_filter.field_name);
if (!a_filter.referenced_collection_name.empty() || a_filter.field_name == "id" ||
(index->field_is_indexed(a_filter.field_name) && (f.is_integer() || f.is_float() || f.is_bool()))) {
return true;
void filter_result_iterator_t::get_n_ids(const uint32_t& n, std::vector<uint32_t>& results) {
if (is_filter_result_initialized) {
for (uint32_t count = 0; count < n && result_index < filter_result.count; count++) {
results.push_back(filter_result.docs[result_index++]);
}
is_valid = result_index < filter_result.count;
return;
}
return false;
for (uint32_t count = 0; count < n && is_valid; count++) {
results.push_back(seq_id);
next();
}
}

View File

@ -2741,7 +2741,7 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
return filter_init_op;
}
if (filter_tree_root != nullptr && !filter_result_iterator.valid()) {
if (filter_tree_root != nullptr && !filter_result_iterator.is_valid) {
return Option(true);
}
@ -2806,7 +2806,7 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
// for phrase query, parser will set field_query_tokens to "*", need to handle that
if (is_wildcard_query && field_query_tokens[0].q_phrases.empty()) {
const uint8_t field_id = (uint8_t)(FIELD_LIMIT_NUM - 0);
bool no_filters_provided = (filter_tree_root == nullptr && !filter_result_iterator.valid());
bool no_filters_provided = (filter_tree_root == nullptr && !filter_result_iterator.is_valid);
if(no_filters_provided && facets.empty() && curated_ids.empty() && vector_query.field_name.empty() &&
sort_fields_std.size() == 1 && sort_fields_std[0].name == sort_field_const::seq_id &&
@ -2855,11 +2855,9 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
store, doc_id_prefix, filter_tree_root);
filter_result_iterator = filter_result_iterator_t(collection_name, this, filter_tree_root);
approx_filter_ids_length = filter_result_iterator.is_valid;
}
// TODO: Curate ids at last
// curate_filtered_ids(curated_ids, excluded_result_ids,
// excluded_result_ids_size, filter_result.docs, filter_result.count, curated_ids_sorted);
collate_included_ids({}, included_ids_map, curated_topster, searched_queries);
if (!vector_query.field_name.empty()) {
@ -2875,8 +2873,7 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
uint32_t filter_id_count = 0;
while (!no_filters_provided &&
filter_id_count < vector_query.flat_search_cutoff &&
filter_result_iterator.valid()) {
filter_id_count < vector_query.flat_search_cutoff && filter_result_iterator.is_valid) {
auto seq_id = filter_result_iterator.seq_id;
std::vector<float> values;
@ -2904,7 +2901,7 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
}
if(no_filters_provided ||
(filter_id_count >= vector_query.flat_search_cutoff && filter_result_iterator.valid())) {
(filter_id_count >= vector_query.flat_search_cutoff && filter_result_iterator.is_valid)) {
dist_labels.clear();
VectorFilterFunctor filterFunctor(&filter_result_iterator);
@ -2973,7 +2970,19 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
all_result_ids, all_result_ids_len,
filter_result_iterator, approx_filter_ids_length, concurrency,
sort_order, field_values, geopoint_indices);
filter_result_iterator.reset();
}
// filter tree was initialized to have all sequence ids in this flow.
if (no_filters_provided) {
delete filter_tree_root;
filter_tree_root = nullptr;
}
uint32_t _all_result_ids_len = all_result_ids_len;
curate_filtered_ids(curated_ids, excluded_result_ids,
excluded_result_ids_size, all_result_ids, _all_result_ids_len, curated_ids_sorted);
all_result_ids_len = _all_result_ids_len;
} else {
// Non-wildcard
// In multi-field searches, a record can be matched across different fields, so we use this for aggregation
@ -3414,7 +3423,7 @@ void Index::process_curated_ids(const std::vector<std::pair<uint32_t, uint32_t>>
// if `filter_curated_hits` is enabled, we will remove curated hits that don't match filter condition
std::set<uint32_t> included_ids_set;
if(filter_result_iterator.valid() && filter_curated_hits) {
if(filter_result_iterator.is_valid && filter_curated_hits) {
for (const auto &included_id: included_ids_vec) {
auto result = filter_result_iterator.valid(included_id);
@ -3683,6 +3692,7 @@ void Index::fuzzy_search_fields(const std::vector<search_field_t>& the_fields,
art_fuzzy_search_i(search_index.at(the_field.name), (const unsigned char *) token.c_str(), token_len,
costs[token_index], costs[token_index], max_candidates, token_order, prefix_search,
false, "", filter_result_iterator, field_leaves, unique_tokens);
filter_result_iterator.reset();
if(field_leaves.empty()) {
// look at the next field
@ -4649,7 +4659,7 @@ void Index::do_infix_search(const size_t num_search_fields, const std::vector<se
raw_infix_ids_length = infix_ids.size();
}
if(filter_result_iterator.valid()) {
if(filter_result_iterator.is_valid) {
uint32_t *filtered_raw_infix_ids = nullptr;
raw_infix_ids_length = filter_result_iterator.and_scalar(raw_infix_ids, raw_infix_ids_length,
@ -4971,29 +4981,16 @@ void Index::search_wildcard(filter_node_t const* const& filter_tree_root,
std::condition_variable cv_process;
size_t num_queued = 0;
size_t filter_index = 0;
const auto parent_search_begin = search_begin_us;
const auto parent_search_stop_ms = search_stop_us;
auto parent_search_cutoff = search_cutoff;
for(size_t thread_id = 0; thread_id < num_threads &&
(filter_result_iterator.can_get_ids() ?
filter_index < filter_result_iterator.get_length() :
filter_result_iterator.valid()); thread_id++) {
for(size_t thread_id = 0; thread_id < num_threads && filter_result_iterator.is_valid; thread_id++) {
std::vector<uint32_t> batch_result_ids;
batch_result_ids.reserve(window_size);
if (filter_result_iterator.can_get_ids()) {
while (batch_result_ids.size() < window_size && filter_index < filter_result_iterator.get_length()) {
batch_result_ids.push_back(filter_result_iterator.get_ids()[filter_index++]);
}
} else {
do {
batch_result_ids.push_back(filter_result_iterator.seq_id);
filter_result_iterator.next();
} while (batch_result_ids.size() < window_size && filter_result_iterator.valid());
}
filter_result_iterator.get_n_ids(window_size, batch_result_ids);
num_queued++;

View File

@ -65,7 +65,7 @@ TEST_F(FilterTest, FilterTreeIterator) {
auto iter_null_filter_tree_test = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root);
ASSERT_TRUE(iter_null_filter_tree_test.init_status().ok());
ASSERT_FALSE(iter_null_filter_tree_test.valid());
ASSERT_FALSE(iter_null_filter_tree_test.is_valid);
Option<bool> filter_op = filter::parse_filter_query("name: foo", coll->get_schema(), store, doc_id_prefix,
filter_tree_root);
@ -74,7 +74,7 @@ TEST_F(FilterTest, FilterTreeIterator) {
auto iter_no_match_test = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root);
ASSERT_TRUE(iter_no_match_test.init_status().ok());
ASSERT_FALSE(iter_no_match_test.valid());
ASSERT_FALSE(iter_no_match_test.is_valid);
delete filter_tree_root;
filter_tree_root = nullptr;
@ -85,7 +85,7 @@ TEST_F(FilterTest, FilterTreeIterator) {
auto iter_no_match_multi_test = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root);
ASSERT_TRUE(iter_no_match_multi_test.init_status().ok());
ASSERT_FALSE(iter_no_match_multi_test.valid());
ASSERT_FALSE(iter_no_match_multi_test.is_valid);
delete filter_tree_root;
filter_tree_root = nullptr;
@ -97,11 +97,11 @@ TEST_F(FilterTest, FilterTreeIterator) {
ASSERT_TRUE(iter_contains_test.init_status().ok());
for (uint32_t i = 0; i < 5; i++) {
ASSERT_TRUE(iter_contains_test.valid());
ASSERT_TRUE(iter_contains_test.is_valid);
ASSERT_EQ(i, iter_contains_test.seq_id);
iter_contains_test.next();
}
ASSERT_FALSE(iter_contains_test.valid());
ASSERT_FALSE(iter_contains_test.is_valid);
delete filter_tree_root;
filter_tree_root = nullptr;
@ -113,11 +113,11 @@ TEST_F(FilterTest, FilterTreeIterator) {
ASSERT_TRUE(iter_contains_multi_test.init_status().ok());
for (uint32_t i = 0; i < 5; i++) {
ASSERT_TRUE(iter_contains_multi_test.valid());
ASSERT_TRUE(iter_contains_multi_test.is_valid);
ASSERT_EQ(i, iter_contains_multi_test.seq_id);
iter_contains_multi_test.next();
}
ASSERT_FALSE(iter_contains_multi_test.valid());
ASSERT_FALSE(iter_contains_multi_test.is_valid);
delete filter_tree_root;
filter_tree_root = nullptr;
@ -129,11 +129,11 @@ TEST_F(FilterTest, FilterTreeIterator) {
ASSERT_TRUE(iter_exact_match_1_test.init_status().ok());
for (uint32_t i = 0; i < 5; i++) {
ASSERT_TRUE(iter_exact_match_1_test.valid());
ASSERT_TRUE(iter_exact_match_1_test.is_valid);
ASSERT_EQ(i, iter_exact_match_1_test.seq_id);
iter_exact_match_1_test.next();
}
ASSERT_FALSE(iter_exact_match_1_test.valid());
ASSERT_FALSE(iter_exact_match_1_test.is_valid);
delete filter_tree_root;
filter_tree_root = nullptr;
@ -143,7 +143,7 @@ TEST_F(FilterTest, FilterTreeIterator) {
auto iter_exact_match_2_test = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root);
ASSERT_TRUE(iter_exact_match_2_test.init_status().ok());
ASSERT_FALSE(iter_exact_match_2_test.valid());
ASSERT_FALSE(iter_exact_match_2_test.is_valid);
delete filter_tree_root;
filter_tree_root = nullptr;
@ -156,11 +156,11 @@ TEST_F(FilterTest, FilterTreeIterator) {
std::vector<int> expected = {0, 2, 3, 4};
for (auto const& i : expected) {
ASSERT_TRUE(iter_exact_match_multi_test.valid());
ASSERT_TRUE(iter_exact_match_multi_test.is_valid);
ASSERT_EQ(i, iter_exact_match_multi_test.seq_id);
iter_exact_match_multi_test.next();
}
ASSERT_FALSE(iter_exact_match_multi_test.valid());
ASSERT_FALSE(iter_exact_match_multi_test.is_valid);
delete filter_tree_root;
filter_tree_root = nullptr;
@ -173,12 +173,12 @@ TEST_F(FilterTest, FilterTreeIterator) {
expected = {1, 3};
for (auto const& i : expected) {
ASSERT_TRUE(iter_not_equals_test.valid());
ASSERT_TRUE(iter_not_equals_test.is_valid);
ASSERT_EQ(i, iter_not_equals_test.seq_id);
iter_not_equals_test.next();
}
ASSERT_FALSE(iter_not_equals_test.valid());
ASSERT_FALSE(iter_not_equals_test.is_valid);
delete filter_tree_root;
filter_tree_root = nullptr;
@ -189,13 +189,13 @@ TEST_F(FilterTest, FilterTreeIterator) {
auto iter_skip_test = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root);
ASSERT_TRUE(iter_skip_test.init_status().ok());
ASSERT_TRUE(iter_skip_test.valid());
ASSERT_TRUE(iter_skip_test.is_valid);
iter_skip_test.skip_to(3);
ASSERT_TRUE(iter_skip_test.valid());
ASSERT_TRUE(iter_skip_test.is_valid);
ASSERT_EQ(4, iter_skip_test.seq_id);
iter_skip_test.next();
ASSERT_FALSE(iter_skip_test.valid());
ASSERT_FALSE(iter_skip_test.is_valid);
delete filter_tree_root;
filter_tree_root = nullptr;
@ -206,11 +206,11 @@ TEST_F(FilterTest, FilterTreeIterator) {
auto iter_and_test = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root);
ASSERT_TRUE(iter_and_test.init_status().ok());
ASSERT_TRUE(iter_and_test.valid());
ASSERT_TRUE(iter_and_test.is_valid);
ASSERT_EQ(1, iter_and_test.seq_id);
iter_and_test.next();
ASSERT_FALSE(iter_and_test.valid());
ASSERT_FALSE(iter_and_test.is_valid);
delete filter_tree_root;
filter_tree_root = nullptr;
@ -234,12 +234,12 @@ TEST_F(FilterTest, FilterTreeIterator) {
expected = {2, 4, 5};
for (auto const& i : expected) {
ASSERT_TRUE(iter_or_test.valid());
ASSERT_TRUE(iter_or_test.is_valid);
ASSERT_EQ(i, iter_or_test.seq_id);
iter_or_test.next();
}
ASSERT_FALSE(iter_or_test.valid());
ASSERT_FALSE(iter_or_test.is_valid);
delete filter_tree_root;
filter_tree_root = nullptr;
@ -250,17 +250,17 @@ TEST_F(FilterTest, FilterTreeIterator) {
auto iter_skip_complex_filter_test = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root);
ASSERT_TRUE(iter_skip_complex_filter_test.init_status().ok());
ASSERT_TRUE(iter_skip_complex_filter_test.valid());
ASSERT_TRUE(iter_skip_complex_filter_test.is_valid);
iter_skip_complex_filter_test.skip_to(4);
expected = {4, 5};
for (auto const& i : expected) {
ASSERT_TRUE(iter_skip_complex_filter_test.valid());
ASSERT_TRUE(iter_skip_complex_filter_test.is_valid);
ASSERT_EQ(i, iter_skip_complex_filter_test.seq_id);
iter_skip_complex_filter_test.next();
}
ASSERT_FALSE(iter_skip_complex_filter_test.valid());
ASSERT_FALSE(iter_skip_complex_filter_test.is_valid);
delete filter_tree_root;
filter_tree_root = nullptr;
@ -358,20 +358,20 @@ TEST_F(FilterTest, FilterTreeIterator) {
expected = {0, 2, 3, 4};
for (auto const& i : expected) {
ASSERT_TRUE(iter_reset_test.valid());
ASSERT_TRUE(iter_reset_test.is_valid);
ASSERT_EQ(i, iter_reset_test.seq_id);
iter_reset_test.next();
}
ASSERT_FALSE(iter_reset_test.valid());
ASSERT_FALSE(iter_reset_test.is_valid);
iter_reset_test.reset();
for (auto const& i : expected) {
ASSERT_TRUE(iter_reset_test.valid());
ASSERT_TRUE(iter_reset_test.is_valid);
ASSERT_EQ(i, iter_reset_test.seq_id);
iter_reset_test.next();
}
ASSERT_FALSE(iter_reset_test.valid());
ASSERT_FALSE(iter_reset_test.is_valid);
auto iter_move_assignment_test = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root);
@ -380,11 +380,11 @@ TEST_F(FilterTest, FilterTreeIterator) {
expected = {0, 2, 3, 4};
for (auto const& i : expected) {
ASSERT_TRUE(iter_move_assignment_test.valid());
ASSERT_TRUE(iter_move_assignment_test.is_valid);
ASSERT_EQ(i, iter_move_assignment_test.seq_id);
iter_move_assignment_test.next();
}
ASSERT_FALSE(iter_move_assignment_test.valid());
ASSERT_FALSE(iter_move_assignment_test.is_valid);
delete filter_tree_root;
filter_tree_root = nullptr;
@ -405,7 +405,7 @@ TEST_F(FilterTest, FilterTreeIterator) {
for (uint32_t i = 0; i < filter_ids_length; i++) {
ASSERT_EQ(expected[i], filter_ids[i]);
}
ASSERT_FALSE(iter_to_array_test.valid());
ASSERT_FALSE(iter_to_array_test.is_valid);
delete filter_ids;
@ -422,7 +422,7 @@ TEST_F(FilterTest, FilterTreeIterator) {
for (uint32_t i = 0; i < and_result_length; i++) {
ASSERT_EQ(expected[i], and_result[i]);
}
ASSERT_FALSE(iter_and_scalar_test.valid());
ASSERT_FALSE(iter_and_scalar_test.is_valid);
delete and_result;
delete filter_tree_root;