Add tests for filter by prefix value. (#1600)

* Add tests for filter by prefix value.

* Use just `*` instead of `.*` for prefix match.
This commit is contained in:
Harpreet Sangar 2024-03-05 16:18:48 +05:30 committed by GitHub
parent daad8df50b
commit bcf0c00d8e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 60 additions and 9 deletions

View File

@ -1145,10 +1145,9 @@ void filter_result_iterator_t::init() {
art_tree* t = index->search_index.at(a_filter.field_name);
for (std::string filter_value : a_filter.values) {
auto is_prefix_match = filter_value.size() > 2 && filter_value[filter_value.size() - 2] == '.' &&
filter_value[filter_value.size() - 1] == '*';
auto is_prefix_match = filter_value.size() > 1 && filter_value[filter_value.size() - 1] == '*';
if (is_prefix_match) {
filter_value.erase(filter_value.size() - 2);
filter_value.erase(filter_value.size() - 1);
}
std::vector<void*> raw_posting_lists;
@ -1231,6 +1230,9 @@ void filter_result_iterator_t::init() {
// Searching for `Chris P.*` will return `Chris Parnell` and `Chris Pine`.
for (const auto& searched_filter_value: searched_filters) {
raw_posting_lists.clear();
approx_filter_value_match = UINT32_MAX;
for (const auto& leaf: searched_filter_value) {
if (leaf == nullptr) {
continue;
@ -1259,7 +1261,6 @@ void filter_result_iterator_t::init() {
// Multiple filter values get OR.
approx_filter_ids_length += approx_filter_value_match;
raw_posting_lists.clear();
}
continue;
}

View File

@ -2394,7 +2394,7 @@ TEST_F(CollectionFilteringTest, PrefixFilterOnTextFields) {
ASSERT_EQ(id, result_id);
}
results = coll_mul_fields->search("*", {}, "cast: Ch.*", {}, {}, {0},
results = coll_mul_fields->search("*", {}, "cast: Ch*", {}, {}, {0},
10, 1, FREQUENCY, {false}).get();
ASSERT_EQ(3, results["hits"].size());
@ -2407,7 +2407,7 @@ TEST_F(CollectionFilteringTest, PrefixFilterOnTextFields) {
ASSERT_EQ(id, result_id);
}
results = coll_mul_fields->search("*", {}, "cast: M.*", {}, {}, {0},
results = coll_mul_fields->search("*", {}, "cast: M*", {}, {}, {0},
10, 1, FREQUENCY, {false}).get();
ASSERT_EQ(3, results["hits"].size());
@ -2420,7 +2420,7 @@ TEST_F(CollectionFilteringTest, PrefixFilterOnTextFields) {
ASSERT_EQ(id, result_id);
}
results = coll_mul_fields->search("*", {}, "cast: Chris P.*", {}, {}, {0},
results = coll_mul_fields->search("*", {}, "cast: Chris P*", {}, {}, {0},
10, 1, FREQUENCY, {false}).get();
ASSERT_EQ(2, results["hits"].size());
@ -2433,7 +2433,7 @@ TEST_F(CollectionFilteringTest, PrefixFilterOnTextFields) {
ASSERT_EQ(id, result_id);
}
results = coll_mul_fields->search("*", {}, "cast: [Martin, Chris P.*]", {}, {}, {0},
results = coll_mul_fields->search("*", {}, "cast: [Martin, Chris P*]", {}, {}, {0},
10, 1, FREQUENCY, {false}).get();
ASSERT_EQ(3, results["hits"].size());
@ -2446,7 +2446,7 @@ TEST_F(CollectionFilteringTest, PrefixFilterOnTextFields) {
ASSERT_EQ(id, result_id);
}
results = coll_mul_fields->search("*", {}, "cast: [M.*, Chris P.*]", {}, {}, {0},
results = coll_mul_fields->search("*", {}, "cast: [M*, Chris P*]", {}, {}, {0},
10, 1, FREQUENCY, {false}).get();
ASSERT_EQ(5, results["hits"].size());

View File

@ -764,6 +764,56 @@ TEST_F(FilterTest, FilterTreeIterator) {
ASSERT_EQ(-1, iter_boolean_test_2.is_valid(10));
ASSERT_EQ(filter_result_iterator_t::invalid, iter_boolean_test_2.validity);
delete filter_tree_root;
doc = R"({
"name": "James rock",
"age": 20,
"years": [],
"rating": 4.51,
"tags": ["gallium", "Gadolinium"]
})"_json;
add_op = coll->add(doc.dump());
ASSERT_TRUE(add_op.ok());
search_stop_us = UINT64_MAX; // `Index::fuzzy_search_fields` checks for timeout.
filter_tree_root = nullptr;
filter_op = filter::parse_filter_query("tags: g*", coll->get_schema(), store, doc_id_prefix,
filter_tree_root);
ASSERT_TRUE(filter_op.ok());
auto iter_string_prefix_value_test = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root);
ASSERT_TRUE(iter_string_prefix_value_test.init_status().ok());
ASSERT_FALSE(iter_string_prefix_value_test._get_is_filter_result_initialized());
ASSERT_EQ(3, iter_string_prefix_value_test.approx_filter_ids_length); // document 0 and 2 have been deleted.
expected = {4, 8};
for (auto const& i : expected) {
ASSERT_EQ(filter_result_iterator_t::valid, iter_string_prefix_value_test.validity);
ASSERT_EQ(i, iter_string_prefix_value_test.seq_id);
iter_string_prefix_value_test.next();
}
ASSERT_EQ(filter_result_iterator_t::invalid, iter_string_prefix_value_test.validity);
delete filter_tree_root;
filter_tree_root = nullptr;
filter_op = filter::parse_filter_query("tags: != g*", coll->get_schema(), store, doc_id_prefix,
filter_tree_root);
ASSERT_TRUE(filter_op.ok());
auto iter_string_prefix_value_test_2 = filter_result_iterator_t(coll->get_name(), coll->_get_index(), filter_tree_root);
ASSERT_TRUE(iter_string_prefix_value_test_2.init_status().ok());
ASSERT_FALSE(iter_string_prefix_value_test_2._get_is_filter_result_initialized());
ASSERT_EQ(3, iter_string_prefix_value_test_2.approx_filter_ids_length); // document 0 and 2 have been deleted.
expected = {1, 3, 5, 6, 7};
for (auto const& i : expected) {
ASSERT_EQ(filter_result_iterator_t::valid, iter_string_prefix_value_test_2.validity);
ASSERT_EQ(i, iter_string_prefix_value_test_2.seq_id);
iter_string_prefix_value_test_2.next();
}
ASSERT_EQ(filter_result_iterator_t::invalid, iter_string_prefix_value_test_2.validity);
delete filter_tree_root;
}
TEST_F(FilterTest, FilterTreeIteratorTimeout) {