mirror of
https://github.com/typesense/typesense.git
synced 2025-05-17 20:22:32 +08:00
Handle !=
in filter_result_iterator_t
.
This commit is contained in:
parent
d796391464
commit
a9ca96a63e
@ -120,6 +120,9 @@ private:
|
||||
/// Performs OR on the subtrees of operator.
|
||||
void or_filter_iterators();
|
||||
|
||||
/// Advance all the token iterators that are at seq_id.
|
||||
void advance_string_filter_token_iterators();
|
||||
|
||||
/// Finds the next match for a filter on string field.
|
||||
void doc_matching_string_filter(bool field_is_array);
|
||||
|
||||
|
@ -126,6 +126,8 @@ public:
|
||||
|
||||
uint32_t first_id();
|
||||
|
||||
uint32_t last_id();
|
||||
|
||||
block_t* block_of(uint32_t id);
|
||||
|
||||
bool contains(uint32_t id);
|
||||
|
@ -265,6 +265,18 @@ void filter_result_iterator_t::or_filter_iterators() {
|
||||
is_valid = false;
|
||||
}
|
||||
|
||||
void filter_result_iterator_t::advance_string_filter_token_iterators() {
|
||||
for (uint32_t i = 0; i < posting_list_iterators.size(); i++) {
|
||||
auto& filter_value_tokens = posting_list_iterators[i];
|
||||
|
||||
if (filter_value_tokens[0].valid() && filter_value_tokens[0].id() == seq_id) {
|
||||
for (auto& iter: filter_value_tokens) {
|
||||
iter.next();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void filter_result_iterator_t::doc_matching_string_filter(bool field_is_array) {
|
||||
// If none of the filter value iterators are valid, mark this node as invalid.
|
||||
bool one_is_valid = false;
|
||||
@ -384,18 +396,38 @@ void filter_result_iterator_t::next() {
|
||||
field f = index->search_schema.at(a_filter.field_name);
|
||||
|
||||
if (f.is_string()) {
|
||||
// Advance all the filter values that are at doc. Then find the next doc.
|
||||
for (uint32_t i = 0; i < posting_list_iterators.size(); i++) {
|
||||
auto& filter_value_tokens = posting_list_iterators[i];
|
||||
|
||||
if (filter_value_tokens[0].valid() && filter_value_tokens[0].id() == seq_id) {
|
||||
for (auto& iter: filter_value_tokens) {
|
||||
iter.next();
|
||||
}
|
||||
if (filter_node->filter_exp.apply_not_equals) {
|
||||
if (++seq_id < result_index) {
|
||||
return;
|
||||
}
|
||||
|
||||
uint32_t previous_match;
|
||||
do {
|
||||
previous_match = seq_id;
|
||||
advance_string_filter_token_iterators();
|
||||
doc_matching_string_filter(f.is_array());
|
||||
} while (is_valid && previous_match + 1 == seq_id);
|
||||
|
||||
if (!is_valid) {
|
||||
// We've reached the end of the index, no possible matches pending.
|
||||
if (previous_match >= index->seq_ids->last_id()) {
|
||||
return;
|
||||
}
|
||||
|
||||
is_valid = true;
|
||||
result_index = index->seq_ids->last_id() + 1;
|
||||
seq_id = previous_match + 1;
|
||||
return;
|
||||
}
|
||||
|
||||
result_index = seq_id;
|
||||
seq_id = previous_match + 1;
|
||||
return;
|
||||
}
|
||||
|
||||
advance_string_filter_token_iterators();
|
||||
doc_matching_string_filter(f.is_array());
|
||||
|
||||
return;
|
||||
}
|
||||
}
|
||||
@ -509,6 +541,47 @@ void filter_result_iterator_t::init() {
|
||||
}
|
||||
|
||||
doc_matching_string_filter(f.is_array());
|
||||
|
||||
if (filter_node->filter_exp.apply_not_equals) {
|
||||
// filter didn't match any id. So by applying not equals, every id in the index is a match.
|
||||
if (!is_valid) {
|
||||
is_valid = true;
|
||||
seq_id = 0;
|
||||
result_index = index->seq_ids->last_id() + 1;
|
||||
return;
|
||||
}
|
||||
|
||||
// [0, seq_id) are a match for not equals.
|
||||
if (seq_id > 0) {
|
||||
result_index = seq_id;
|
||||
seq_id = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
// Keep ignoring the consecutive matches.
|
||||
uint32_t previous_match;
|
||||
do {
|
||||
previous_match = seq_id;
|
||||
advance_string_filter_token_iterators();
|
||||
doc_matching_string_filter(f.is_array());
|
||||
} while (is_valid && previous_match + 1 == seq_id);
|
||||
|
||||
if (!is_valid) {
|
||||
// filter matched all the ids in the index. So for not equals, there's no match.
|
||||
if (previous_match >= index->seq_ids->last_id()) {
|
||||
return;
|
||||
}
|
||||
|
||||
is_valid = true;
|
||||
result_index = index->seq_ids->last_id() + 1;
|
||||
seq_id = previous_match + 1;
|
||||
return;
|
||||
}
|
||||
|
||||
result_index = seq_id;
|
||||
seq_id = previous_match + 1;
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
}
|
||||
@ -543,6 +616,10 @@ bool filter_result_iterator_t::valid() {
|
||||
field f = index->search_schema.at(a_filter.field_name);
|
||||
|
||||
if (f.is_string()) {
|
||||
if (filter_node->filter_exp.apply_not_equals) {
|
||||
return seq_id < result_index;
|
||||
}
|
||||
|
||||
bool one_is_valid = false;
|
||||
for (auto& filter_value_tokens: posting_list_iterators) {
|
||||
posting_list_t::intersect(filter_value_tokens, one_is_valid);
|
||||
@ -618,6 +695,41 @@ void filter_result_iterator_t::skip_to(uint32_t id) {
|
||||
field f = index->search_schema.at(a_filter.field_name);
|
||||
|
||||
if (f.is_string()) {
|
||||
if (filter_node->filter_exp.apply_not_equals) {
|
||||
if (id < seq_id) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (id < result_index) {
|
||||
seq_id = id;
|
||||
return;
|
||||
}
|
||||
|
||||
seq_id = result_index;
|
||||
uint32_t previous_match;
|
||||
do {
|
||||
previous_match = seq_id;
|
||||
advance_string_filter_token_iterators();
|
||||
doc_matching_string_filter(f.is_array());
|
||||
} while (is_valid && previous_match + 1 == seq_id && seq_id >= id);
|
||||
|
||||
if (!is_valid) {
|
||||
// filter matched all the ids in the index. So for not equals, there's no match.
|
||||
if (previous_match >= index->seq_ids->last_id()) {
|
||||
return;
|
||||
}
|
||||
|
||||
is_valid = true;
|
||||
seq_id = previous_match + 1;
|
||||
result_index = index->seq_ids->last_id() + 1;
|
||||
return;
|
||||
}
|
||||
|
||||
result_index = seq_id;
|
||||
seq_id = previous_match + 1;
|
||||
return;
|
||||
}
|
||||
|
||||
// Skip all the token iterators and find a new match.
|
||||
for (auto& filter_value_tokens : posting_list_iterators) {
|
||||
for (auto& token: filter_value_tokens) {
|
||||
@ -670,23 +782,6 @@ int filter_result_iterator_t::valid(uint32_t id) {
|
||||
}
|
||||
}
|
||||
|
||||
if (filter_node->filter_exp.apply_not_equals) {
|
||||
// Even when iterator becomes invalid, we keep it marked as valid since we are evaluating not equals.
|
||||
if (!valid()) {
|
||||
is_valid = true;
|
||||
return 1;
|
||||
}
|
||||
|
||||
skip_to(id);
|
||||
|
||||
if (!is_valid) {
|
||||
is_valid = true;
|
||||
return 1;
|
||||
}
|
||||
|
||||
return seq_id != id ? 1 : 0;
|
||||
}
|
||||
|
||||
skip_to(id);
|
||||
return is_valid ? (seq_id == id ? 1 : 0) : -1;
|
||||
}
|
||||
|
@ -338,6 +338,14 @@ uint32_t id_list_t::first_id() {
|
||||
return root_block.ids.at(0);
|
||||
}
|
||||
|
||||
uint32_t id_list_t::last_id() {
|
||||
if(id_block_map.empty()) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return id_block_map.rbegin()->first;
|
||||
}
|
||||
|
||||
id_list_t::block_t* id_list_t::block_of(uint32_t id) {
|
||||
const auto it = id_block_map.lower_bound(id);
|
||||
if(it == id_block_map.end()) {
|
||||
|
@ -162,23 +162,23 @@ TEST_F(FilterTest, FilterTreeIterator) {
|
||||
}
|
||||
ASSERT_FALSE(iter_exact_match_multi_test.valid());
|
||||
|
||||
// delete filter_tree_root;
|
||||
// filter_tree_root = nullptr;
|
||||
// filter_op = filter::parse_filter_query("tags:!= gold", coll->get_schema(), store, doc_id_prefix,
|
||||
// filter_tree_root);
|
||||
// ASSERT_TRUE(filter_op.ok());
|
||||
//
|
||||
// auto iter_not_equals_test = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root);
|
||||
// ASSERT_TRUE(iter_not_equals_test.init_status().ok());
|
||||
//
|
||||
// std::vector<uint32_t> expected = {1, 3};
|
||||
// for (auto const& i : expected) {
|
||||
// ASSERT_TRUE(iter_not_equals_test.valid());
|
||||
// ASSERT_EQ(i, iter_not_equals_test.seq_id);
|
||||
// iter_not_equals_test.next();
|
||||
// }
|
||||
//
|
||||
// ASSERT_FALSE(iter_not_equals_test.valid());
|
||||
delete filter_tree_root;
|
||||
filter_tree_root = nullptr;
|
||||
filter_op = filter::parse_filter_query("tags:!= gold", coll->get_schema(), store, doc_id_prefix,
|
||||
filter_tree_root);
|
||||
ASSERT_TRUE(filter_op.ok());
|
||||
|
||||
auto iter_not_equals_test = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root);
|
||||
ASSERT_TRUE(iter_not_equals_test.init_status().ok());
|
||||
|
||||
expected = {1, 3};
|
||||
for (auto const& i : expected) {
|
||||
ASSERT_TRUE(iter_not_equals_test.valid());
|
||||
ASSERT_EQ(i, iter_not_equals_test.seq_id);
|
||||
iter_not_equals_test.next();
|
||||
}
|
||||
|
||||
ASSERT_FALSE(iter_not_equals_test.valid());
|
||||
|
||||
delete filter_tree_root;
|
||||
filter_tree_root = nullptr;
|
||||
@ -287,8 +287,8 @@ TEST_F(FilterTest, FilterTreeIterator) {
|
||||
filter_tree_root);
|
||||
ASSERT_TRUE(iter_validate_ids_not_equals_filter_test.init_status().ok());
|
||||
|
||||
validate_ids = {0, 1, 2, 3, 4, 5, 6, 7, 100};
|
||||
expected = {0, 1, 0, 1, 0, 1, 1, 1, 1};
|
||||
validate_ids = {0, 1, 2, 3, 4, 5, 6};
|
||||
expected = {0, 1, 0, 1, 0, 1, -1};
|
||||
for (uint32_t i = 0; i < validate_ids.size(); i++) {
|
||||
ASSERT_EQ(expected[i], iter_validate_ids_not_equals_filter_test.valid(validate_ids[i]));
|
||||
}
|
||||
@ -422,7 +422,7 @@ TEST_F(FilterTest, FilterTreeIterator) {
|
||||
for (uint32_t i = 0; i < and_result_length; i++) {
|
||||
ASSERT_EQ(expected[i], and_result[i]);
|
||||
}
|
||||
ASSERT_FALSE(iter_and_test.valid());
|
||||
ASSERT_FALSE(iter_and_scalar_test.valid());
|
||||
|
||||
delete and_result;
|
||||
delete filter_tree_root;
|
||||
|
Loading…
x
Reference in New Issue
Block a user