Add tests for rearranging filter tree and approx filter match count.

This commit is contained in:
Harpreet Sangar 2023-03-07 18:28:19 +05:30
parent 4ae42c45cb
commit c6386b0c2f
2 changed files with 207 additions and 1 deletions

View File

@ -513,7 +513,7 @@ struct filter {
bool apply_not_equals = false;
// Would store `Foo` in case of a filter expression like `$Foo(bar := baz)`
std::string referenced_collection_name;
std::string referenced_collection_name = "";
static const std::string RANGE_OPERATOR() {
return "..";

View File

@ -1973,3 +1973,209 @@ TEST_F(CollectionSpecificMoreTest, CrossFieldTypoAndPrefixWithWeights) {
"<mark>", "</mark>", {2, 3}).get();
ASSERT_EQ(1, res["hits"].size());
}
TEST_F(CollectionSpecificMoreTest, RearrangingFilterTree) {
nlohmann::json schema =
R"({
"name": "Collection",
"fields": [
{"name": "name", "type": "string"},
{"name": "age", "type": "int32"},
{"name": "years", "type": "int32[]"},
{"name": "rating", "type": "float"}
]
})"_json;
Collection* coll = collectionManager.create_collection(schema).get();
std::ifstream infile(std::string(ROOT_DIR)+"test/numeric_array_documents.jsonl");
std::string json_line;
while (std::getline(infile, json_line)) {
auto add_op = coll->add(json_line);
ASSERT_TRUE(add_op.ok());
}
infile.close();
const std::string doc_id_prefix = std::to_string(coll->get_collection_id()) + "_" + Collection::DOC_ID_PREFIX + "_";
filter_node_t* filter_tree_root = nullptr;
Option<bool> filter_op = filter::parse_filter_query("years:>2000 && ((age:<30 && rating:>5) || (age:>50 && rating:<5))",
coll->get_schema(), store, doc_id_prefix, filter_tree_root);
ASSERT_TRUE(filter_op.ok());
std::unique_ptr<filter_node_t> filter_tree_root_guard(filter_tree_root);
// &&
// / \
// years>2000 ||
// 4 / \
// / &&
// && / \
// / \ age>50 rating<5
// / \ 1 2
// / \
// age<30 rating>5
// 2 3
ASSERT_TRUE(filter_tree_root != nullptr);
ASSERT_TRUE(filter_tree_root->isOperator);
ASSERT_EQ(filter_tree_root->filter_operator, AND);
auto root = filter_tree_root->left;
ASSERT_TRUE(root != nullptr);
ASSERT_FALSE(root->isOperator);
ASSERT_EQ(root->filter_exp.field_name, "years");
ASSERT_TRUE(root->left == nullptr);
ASSERT_TRUE(root->right == nullptr);
root = filter_tree_root->right;
ASSERT_TRUE(root != nullptr);
ASSERT_TRUE(root->isOperator);
ASSERT_EQ(root->filter_operator, OR);
root = filter_tree_root->right->left;
ASSERT_TRUE(root != nullptr);
ASSERT_TRUE(root->isOperator);
ASSERT_EQ(root->filter_operator, AND);
root = filter_tree_root->right->left->left;
ASSERT_TRUE(root != nullptr);
ASSERT_FALSE(root->isOperator);
ASSERT_EQ(root->filter_exp.field_name, "age");
ASSERT_EQ(root->filter_exp.comparators.front(), LESS_THAN);
ASSERT_EQ(root->filter_exp.values.front(), "30");
ASSERT_TRUE(root->left == nullptr);
ASSERT_TRUE(root->right == nullptr);
root = filter_tree_root->right->left->right;
ASSERT_TRUE(root != nullptr);
ASSERT_FALSE(root->isOperator);
ASSERT_EQ(root->filter_exp.field_name, "rating");
ASSERT_EQ(root->filter_exp.comparators.front(), GREATER_THAN);
ASSERT_EQ(root->filter_exp.values.front(), "5");
ASSERT_TRUE(root->left == nullptr);
ASSERT_TRUE(root->right == nullptr);
root = filter_tree_root->right->right;
ASSERT_TRUE(root != nullptr);
ASSERT_TRUE(root->isOperator);
ASSERT_EQ(root->filter_operator, AND);
root = filter_tree_root->right->right->left;
ASSERT_TRUE(root != nullptr);
ASSERT_FALSE(root->isOperator);
ASSERT_EQ(root->filter_exp.field_name, "age");
ASSERT_EQ(root->filter_exp.comparators.front(), GREATER_THAN);
ASSERT_EQ(root->filter_exp.values.front(), "50");
ASSERT_TRUE(root->left == nullptr);
ASSERT_TRUE(root->right == nullptr);
root = filter_tree_root->right->right->right;
ASSERT_TRUE(root != nullptr);
ASSERT_FALSE(root->isOperator);
ASSERT_EQ(root->filter_exp.field_name, "rating");
ASSERT_EQ(root->filter_exp.comparators.front(), LESS_THAN);
ASSERT_EQ(root->filter_exp.values.front(), "5");
ASSERT_TRUE(root->left == nullptr);
ASSERT_TRUE(root->right == nullptr);
filter_result_t result;
// Internally calls rearranging_recursive_filter
coll->_get_index()->do_filtering_with_lock(filter_tree_root, result);
// &&
// / \
// || years>2000
// / \
// && \
// / \ \
// age>50 rating<5 &&
// / \
// age<30 rating>5
ASSERT_TRUE(filter_tree_root != nullptr);
ASSERT_TRUE(filter_tree_root->isOperator);
ASSERT_EQ(filter_tree_root->filter_operator, AND);
root = filter_tree_root->left;
ASSERT_TRUE(root != nullptr);
ASSERT_TRUE(root->isOperator);
ASSERT_EQ(root->filter_operator, OR);
root = filter_tree_root->left->left;
ASSERT_TRUE(root != nullptr);
ASSERT_TRUE(root->isOperator);
ASSERT_EQ(root->filter_operator, AND);
root = filter_tree_root->left->left->left;
ASSERT_TRUE(root != nullptr);
ASSERT_FALSE(root->isOperator);
ASSERT_EQ(root->filter_exp.field_name, "age");
ASSERT_EQ(root->filter_exp.comparators.front(), GREATER_THAN);
ASSERT_EQ(root->filter_exp.values.front(), "50");
ASSERT_TRUE(root->left == nullptr);
ASSERT_TRUE(root->right == nullptr);
root = filter_tree_root->left->left->right;
ASSERT_TRUE(root != nullptr);
ASSERT_FALSE(root->isOperator);
ASSERT_EQ(root->filter_exp.field_name, "rating");
ASSERT_EQ(root->filter_exp.comparators.front(), LESS_THAN);
ASSERT_EQ(root->filter_exp.values.front(), "5");
ASSERT_TRUE(root->left == nullptr);
ASSERT_TRUE(root->right == nullptr);
root = filter_tree_root->left->right;
ASSERT_TRUE(root != nullptr);
ASSERT_TRUE(root->isOperator);
ASSERT_EQ(root->filter_operator, AND);
root = filter_tree_root->left->right->left;
ASSERT_TRUE(root != nullptr);
ASSERT_FALSE(root->isOperator);
ASSERT_EQ(root->filter_exp.field_name, "age");
ASSERT_EQ(root->filter_exp.comparators.front(), LESS_THAN);
ASSERT_EQ(root->filter_exp.values.front(), "30");
ASSERT_TRUE(root->left == nullptr);
ASSERT_TRUE(root->right == nullptr);
root = filter_tree_root->left->right->right;
ASSERT_TRUE(root != nullptr);
ASSERT_FALSE(root->isOperator);
ASSERT_EQ(root->filter_exp.field_name, "rating");
ASSERT_EQ(root->filter_exp.comparators.front(), GREATER_THAN);
ASSERT_EQ(root->filter_exp.values.front(), "5");
ASSERT_TRUE(root->left == nullptr);
ASSERT_TRUE(root->right == nullptr);
root = filter_tree_root->right;
ASSERT_TRUE(root != nullptr);
ASSERT_FALSE(root->isOperator);
ASSERT_EQ(root->filter_exp.field_name, "years");
ASSERT_TRUE(root->left == nullptr);
ASSERT_TRUE(root->right == nullptr);
}
TEST_F(CollectionSpecificMoreTest, ApproxFilterMatchCount) {
nlohmann::json schema =
R"({
"name": "Collection",
"fields": [
{"name": "name", "type": "string"},
{"name": "age", "type": "int32"},
{"name": "years", "type": "int32[]"},
{"name": "rating", "type": "float"}
]
})"_json;
Collection *coll = collectionManager.create_collection(schema).get();
std::ifstream infile(std::string(ROOT_DIR) + "test/numeric_array_documents.jsonl");
std::string json_line;
while (std::getline(infile, json_line)) {
auto add_op = coll->add(json_line);
ASSERT_TRUE(add_op.ok());
}
infile.close();
uint32_t approx_count;
coll->get_approximate_reference_filter_ids("years:>2000 && ((age:<30 && rating:>5) || (age:>50 && rating:<5))",
approx_count);
ASSERT_EQ(approx_count, 3);
}