Handle != in filter_result_iterator_t.

This commit is contained in:
Harpreet Sangar 2023-04-12 18:47:55 +05:30
parent d796391464
commit a9ca96a63e
5 changed files with 153 additions and 45 deletions

View File

@ -120,6 +120,9 @@ private:
/// Performs OR on the subtrees of operator.
void or_filter_iterators();
/// Advance all the token iterators that are at seq_id.
void advance_string_filter_token_iterators();
/// Finds the next match for a filter on string field.
void doc_matching_string_filter(bool field_is_array);

View File

@ -126,6 +126,8 @@ public:
uint32_t first_id();
uint32_t last_id();
block_t* block_of(uint32_t id);
bool contains(uint32_t id);

View File

@ -265,6 +265,18 @@ void filter_result_iterator_t::or_filter_iterators() {
is_valid = false;
}
void filter_result_iterator_t::advance_string_filter_token_iterators() {
for (uint32_t i = 0; i < posting_list_iterators.size(); i++) {
auto& filter_value_tokens = posting_list_iterators[i];
if (filter_value_tokens[0].valid() && filter_value_tokens[0].id() == seq_id) {
for (auto& iter: filter_value_tokens) {
iter.next();
}
}
}
}
void filter_result_iterator_t::doc_matching_string_filter(bool field_is_array) {
// If none of the filter value iterators are valid, mark this node as invalid.
bool one_is_valid = false;
@ -384,18 +396,38 @@ void filter_result_iterator_t::next() {
field f = index->search_schema.at(a_filter.field_name);
if (f.is_string()) {
// Advance all the filter values that are at doc. Then find the next doc.
for (uint32_t i = 0; i < posting_list_iterators.size(); i++) {
auto& filter_value_tokens = posting_list_iterators[i];
if (filter_value_tokens[0].valid() && filter_value_tokens[0].id() == seq_id) {
for (auto& iter: filter_value_tokens) {
iter.next();
}
if (filter_node->filter_exp.apply_not_equals) {
if (++seq_id < result_index) {
return;
}
uint32_t previous_match;
do {
previous_match = seq_id;
advance_string_filter_token_iterators();
doc_matching_string_filter(f.is_array());
} while (is_valid && previous_match + 1 == seq_id);
if (!is_valid) {
// We've reached the end of the index, no possible matches pending.
if (previous_match >= index->seq_ids->last_id()) {
return;
}
is_valid = true;
result_index = index->seq_ids->last_id() + 1;
seq_id = previous_match + 1;
return;
}
result_index = seq_id;
seq_id = previous_match + 1;
return;
}
advance_string_filter_token_iterators();
doc_matching_string_filter(f.is_array());
return;
}
}
@ -509,6 +541,47 @@ void filter_result_iterator_t::init() {
}
doc_matching_string_filter(f.is_array());
if (filter_node->filter_exp.apply_not_equals) {
// filter didn't match any id. So by applying not equals, every id in the index is a match.
if (!is_valid) {
is_valid = true;
seq_id = 0;
result_index = index->seq_ids->last_id() + 1;
return;
}
// [0, seq_id) are a match for not equals.
if (seq_id > 0) {
result_index = seq_id;
seq_id = 0;
return;
}
// Keep ignoring the consecutive matches.
uint32_t previous_match;
do {
previous_match = seq_id;
advance_string_filter_token_iterators();
doc_matching_string_filter(f.is_array());
} while (is_valid && previous_match + 1 == seq_id);
if (!is_valid) {
// filter matched all the ids in the index. So for not equals, there's no match.
if (previous_match >= index->seq_ids->last_id()) {
return;
}
is_valid = true;
result_index = index->seq_ids->last_id() + 1;
seq_id = previous_match + 1;
return;
}
result_index = seq_id;
seq_id = previous_match + 1;
}
return;
}
}
@ -543,6 +616,10 @@ bool filter_result_iterator_t::valid() {
field f = index->search_schema.at(a_filter.field_name);
if (f.is_string()) {
if (filter_node->filter_exp.apply_not_equals) {
return seq_id < result_index;
}
bool one_is_valid = false;
for (auto& filter_value_tokens: posting_list_iterators) {
posting_list_t::intersect(filter_value_tokens, one_is_valid);
@ -618,6 +695,41 @@ void filter_result_iterator_t::skip_to(uint32_t id) {
field f = index->search_schema.at(a_filter.field_name);
if (f.is_string()) {
if (filter_node->filter_exp.apply_not_equals) {
if (id < seq_id) {
return;
}
if (id < result_index) {
seq_id = id;
return;
}
seq_id = result_index;
uint32_t previous_match;
do {
previous_match = seq_id;
advance_string_filter_token_iterators();
doc_matching_string_filter(f.is_array());
} while (is_valid && previous_match + 1 == seq_id && seq_id >= id);
if (!is_valid) {
// filter matched all the ids in the index. So for not equals, there's no match.
if (previous_match >= index->seq_ids->last_id()) {
return;
}
is_valid = true;
seq_id = previous_match + 1;
result_index = index->seq_ids->last_id() + 1;
return;
}
result_index = seq_id;
seq_id = previous_match + 1;
return;
}
// Skip all the token iterators and find a new match.
for (auto& filter_value_tokens : posting_list_iterators) {
for (auto& token: filter_value_tokens) {
@ -670,23 +782,6 @@ int filter_result_iterator_t::valid(uint32_t id) {
}
}
if (filter_node->filter_exp.apply_not_equals) {
// Even when iterator becomes invalid, we keep it marked as valid since we are evaluating not equals.
if (!valid()) {
is_valid = true;
return 1;
}
skip_to(id);
if (!is_valid) {
is_valid = true;
return 1;
}
return seq_id != id ? 1 : 0;
}
skip_to(id);
return is_valid ? (seq_id == id ? 1 : 0) : -1;
}

View File

@ -338,6 +338,14 @@ uint32_t id_list_t::first_id() {
return root_block.ids.at(0);
}
uint32_t id_list_t::last_id() {
if(id_block_map.empty()) {
return 0;
}
return id_block_map.rbegin()->first;
}
id_list_t::block_t* id_list_t::block_of(uint32_t id) {
const auto it = id_block_map.lower_bound(id);
if(it == id_block_map.end()) {

View File

@ -162,23 +162,23 @@ TEST_F(FilterTest, FilterTreeIterator) {
}
ASSERT_FALSE(iter_exact_match_multi_test.valid());
// delete filter_tree_root;
// filter_tree_root = nullptr;
// filter_op = filter::parse_filter_query("tags:!= gold", coll->get_schema(), store, doc_id_prefix,
// filter_tree_root);
// ASSERT_TRUE(filter_op.ok());
//
// auto iter_not_equals_test = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root);
// ASSERT_TRUE(iter_not_equals_test.init_status().ok());
//
// std::vector<uint32_t> expected = {1, 3};
// for (auto const& i : expected) {
// ASSERT_TRUE(iter_not_equals_test.valid());
// ASSERT_EQ(i, iter_not_equals_test.seq_id);
// iter_not_equals_test.next();
// }
//
// ASSERT_FALSE(iter_not_equals_test.valid());
delete filter_tree_root;
filter_tree_root = nullptr;
filter_op = filter::parse_filter_query("tags:!= gold", coll->get_schema(), store, doc_id_prefix,
filter_tree_root);
ASSERT_TRUE(filter_op.ok());
auto iter_not_equals_test = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root);
ASSERT_TRUE(iter_not_equals_test.init_status().ok());
expected = {1, 3};
for (auto const& i : expected) {
ASSERT_TRUE(iter_not_equals_test.valid());
ASSERT_EQ(i, iter_not_equals_test.seq_id);
iter_not_equals_test.next();
}
ASSERT_FALSE(iter_not_equals_test.valid());
delete filter_tree_root;
filter_tree_root = nullptr;
@ -287,8 +287,8 @@ TEST_F(FilterTest, FilterTreeIterator) {
filter_tree_root);
ASSERT_TRUE(iter_validate_ids_not_equals_filter_test.init_status().ok());
validate_ids = {0, 1, 2, 3, 4, 5, 6, 7, 100};
expected = {0, 1, 0, 1, 0, 1, 1, 1, 1};
validate_ids = {0, 1, 2, 3, 4, 5, 6};
expected = {0, 1, 0, 1, 0, 1, -1};
for (uint32_t i = 0; i < validate_ids.size(); i++) {
ASSERT_EQ(expected[i], iter_validate_ids_not_equals_filter_test.valid(validate_ids[i]));
}
@ -422,7 +422,7 @@ TEST_F(FilterTest, FilterTreeIterator) {
for (uint32_t i = 0; i < and_result_length; i++) {
ASSERT_EQ(expected[i], and_result[i]);
}
ASSERT_FALSE(iter_and_test.valid());
ASSERT_FALSE(iter_and_scalar_test.valid());
delete and_result;
delete filter_tree_root;