Add CollectionJoinTest, FilterByReferenceAlias.

This commit is contained in:
Harpreet Sangar 2023-11-17 14:38:58 +05:30
parent 08040ba60c
commit e13adbb0c2

View File

@ -4056,3 +4056,181 @@ TEST_F(CollectionJoinTest, SortByReference) {
ASSERT_FALSE(search_op.ok());
ASSERT_EQ("Multiple references found to sort by on `Customers.product_price`.", search_op.error());
}
TEST_F(CollectionJoinTest, FilterByReferenceAlias) {
auto schema_json =
R"({
"name": "Products",
"fields": [
{"name": "product_id", "type": "string"},
{"name": "product_name", "type": "string", "infix": true},
{"name": "product_description", "type": "string"},
{"name": "embedding", "type":"float[]", "embed":{"from": ["product_description"], "model_config": {"model_name": "ts/e5-small"}}},
{"name": "rating", "type": "int32"}
]
})"_json;
std::vector<nlohmann::json> documents = {
R"({
"product_id": "product_a",
"product_name": "shampoo",
"product_description": "Our new moisturizing shampoo is perfect for those with dry or damaged hair.",
"rating": "2"
})"_json,
R"({
"product_id": "product_b",
"product_name": "soap",
"product_description": "Introducing our all-natural, organic soap bar made with essential oils and botanical ingredients.",
"rating": "4"
})"_json
};
TextEmbedderManager::set_model_dir("/tmp/typesense_test/models");
auto collection_create_op = collectionManager.create_collection(schema_json);
ASSERT_TRUE(collection_create_op.ok());
for (auto const &json: documents) {
auto add_op = collection_create_op.get()->add(json.dump());
if (!add_op.ok()) {
LOG(INFO) << add_op.error();
}
ASSERT_TRUE(add_op.ok());
}
schema_json =
R"({
"name": "Customers",
"fields": [
{"name": "customer_id", "type": "string"},
{"name": "customer_name", "type": "string"},
{"name": "product_price", "type": "float"},
{"name": "product_id", "type": "string", "reference": "Products.product_id"}
]
})"_json;
documents = {
R"({
"customer_id": "customer_a",
"customer_name": "Joe",
"product_price": 143,
"product_id": "product_a"
})"_json,
R"({
"customer_id": "customer_a",
"customer_name": "Joe",
"product_price": 73.5,
"product_id": "product_b"
})"_json,
R"({
"customer_id": "customer_b",
"customer_name": "Dan",
"product_price": 75,
"product_id": "product_a"
})"_json,
R"({
"customer_id": "customer_b",
"customer_name": "Dan",
"product_price": 140,
"product_id": "product_b"
})"_json
};
collection_create_op = collectionManager.create_collection(schema_json);
ASSERT_TRUE(collection_create_op.ok());
for (auto const &json: documents) {
auto add_op = collection_create_op.get()->add(json.dump());
if (!add_op.ok()) {
LOG(INFO) << add_op.error();
}
ASSERT_TRUE(add_op.ok());
}
auto symlink_op = collectionManager.upsert_symlink("Products_alias", "Products");
ASSERT_TRUE(symlink_op.ok());
symlink_op = collectionManager.upsert_symlink("$Customers_alias", "$Customers");
ASSERT_TRUE(symlink_op.ok());
std::map<std::string, std::string> req_params = {
{"collection", "Products_alias"},
{"q", "*"},
{"query_by", "product_name"},
{"filter_by", "$Customers(customer_id:=customer_a && product_price:<100)"},
};
nlohmann::json embedded_params;
std::string json_res;
auto now_ts = std::chrono::duration_cast<std::chrono::microseconds>(
std::chrono::system_clock::now().time_since_epoch()).count();
auto search_op = collectionManager.do_search(req_params, embedded_params, json_res, now_ts);
nlohmann::json res_obj = nlohmann::json::parse(json_res);
ASSERT_EQ(1, res_obj["found"].get<size_t>());
ASSERT_EQ(1, res_obj["hits"].size());
// No fields are mentioned in `include_fields`, should include all fields of Products and Customers by default.
ASSERT_EQ(7, res_obj["hits"][0]["document"].size());
ASSERT_EQ(1, res_obj["hits"][0]["document"].count("id"));
ASSERT_EQ(1, res_obj["hits"][0]["document"].count("product_id"));
ASSERT_EQ(1, res_obj["hits"][0]["document"].count("product_name"));
ASSERT_EQ(1, res_obj["hits"][0]["document"].count("product_description"));
ASSERT_EQ(1, res_obj["hits"][0]["document"].count("embedding"));
ASSERT_EQ(1, res_obj["hits"][0]["document"].count("rating"));
// Default strategy of reference includes is nest. No alias was provided, collection name becomes the field name.
ASSERT_EQ(5, res_obj["hits"][0]["document"]["Customers"].size());
ASSERT_EQ(1, res_obj["hits"][0]["document"]["Customers"].count("customer_id"));
ASSERT_EQ(1, res_obj["hits"][0]["document"]["Customers"].count("customer_name"));
ASSERT_EQ(1, res_obj["hits"][0]["document"]["Customers"].count("id"));
ASSERT_EQ(1, res_obj["hits"][0]["document"]["Customers"].count("product_id"));
ASSERT_EQ(1, res_obj["hits"][0]["document"]["Customers"].count("product_price"));
req_params = {
{"collection", "Products"},
{"q", "*"},
{"query_by", "product_name"},
{"filter_by", "$Customers_alias(customer_id:=customer_a && product_price:<100)"},
};
search_op = collectionManager.do_search(req_params, embedded_params, json_res, now_ts);
res_obj = nlohmann::json::parse(json_res);
ASSERT_EQ(1, res_obj["found"].get<size_t>());
ASSERT_EQ(1, res_obj["hits"].size());
// No fields are mentioned in `include_fields`, should include all fields of Products and Customers by default.
ASSERT_EQ(7, res_obj["hits"][0]["document"].size());
ASSERT_EQ(1, res_obj["hits"][0]["document"].count("id"));
ASSERT_EQ(1, res_obj["hits"][0]["document"].count("product_id"));
ASSERT_EQ(1, res_obj["hits"][0]["document"].count("product_name"));
ASSERT_EQ(1, res_obj["hits"][0]["document"].count("product_description"));
ASSERT_EQ(1, res_obj["hits"][0]["document"].count("embedding"));
ASSERT_EQ(1, res_obj["hits"][0]["document"].count("rating"));
// Default strategy of reference includes is nest. No alias was provided, collection name becomes the field name.
ASSERT_EQ(5, res_obj["hits"][0]["document"]["Customers"].size());
ASSERT_EQ(1, res_obj["hits"][0]["document"]["Customers"].count("customer_id"));
ASSERT_EQ(1, res_obj["hits"][0]["document"]["Customers"].count("customer_name"));
ASSERT_EQ(1, res_obj["hits"][0]["document"]["Customers"].count("id"));
ASSERT_EQ(1, res_obj["hits"][0]["document"]["Customers"].count("product_id"));
ASSERT_EQ(1, res_obj["hits"][0]["document"]["Customers"].count("product_price"));
req_params = {
{"collection", "Products_alias"},
{"q", "*"},
{"query_by", "product_name"},
{"filter_by", "$Customers_alias(customer_id:=customer_a && product_price:<100)"},
};
search_op = collectionManager.do_search(req_params, embedded_params, json_res, now_ts);
res_obj = nlohmann::json::parse(json_res);
ASSERT_EQ(1, res_obj["found"].get<size_t>());
ASSERT_EQ(1, res_obj["hits"].size());
// No fields are mentioned in `include_fields`, should include all fields of Products and Customers by default.
ASSERT_EQ(7, res_obj["hits"][0]["document"].size());
ASSERT_EQ(1, res_obj["hits"][0]["document"].count("id"));
ASSERT_EQ(1, res_obj["hits"][0]["document"].count("product_id"));
ASSERT_EQ(1, res_obj["hits"][0]["document"].count("product_name"));
ASSERT_EQ(1, res_obj["hits"][0]["document"].count("product_description"));
ASSERT_EQ(1, res_obj["hits"][0]["document"].count("embedding"));
ASSERT_EQ(1, res_obj["hits"][0]["document"].count("rating"));
// Default strategy of reference includes is nest. No alias was provided, collection name becomes the field name.
ASSERT_EQ(5, res_obj["hits"][0]["document"]["Customers"].size());
ASSERT_EQ(1, res_obj["hits"][0]["document"]["Customers"].count("customer_id"));
ASSERT_EQ(1, res_obj["hits"][0]["document"]["Customers"].count("customer_name"));
ASSERT_EQ(1, res_obj["hits"][0]["document"]["Customers"].count("id"));
ASSERT_EQ(1, res_obj["hits"][0]["document"]["Customers"].count("product_id"));
ASSERT_EQ(1, res_obj["hits"][0]["document"]["Customers"].count("product_price"));
}