From e13adbb0c28ef49d46c700efa2957aec5a90b7c8 Mon Sep 17 00:00:00 2001 From: Harpreet Sangar Date: Fri, 17 Nov 2023 14:38:58 +0530 Subject: [PATCH] Add `CollectionJoinTest, FilterByReferenceAlias`. --- test/collection_join_test.cpp | 178 ++++++++++++++++++++++++++++++++++ 1 file changed, 178 insertions(+) diff --git a/test/collection_join_test.cpp b/test/collection_join_test.cpp index e3e15354..577bd9d7 100644 --- a/test/collection_join_test.cpp +++ b/test/collection_join_test.cpp @@ -4056,3 +4056,181 @@ TEST_F(CollectionJoinTest, SortByReference) { ASSERT_FALSE(search_op.ok()); ASSERT_EQ("Multiple references found to sort by on `Customers.product_price`.", search_op.error()); } + +TEST_F(CollectionJoinTest, FilterByReferenceAlias) { + auto schema_json = + R"({ + "name": "Products", + "fields": [ + {"name": "product_id", "type": "string"}, + {"name": "product_name", "type": "string", "infix": true}, + {"name": "product_description", "type": "string"}, + {"name": "embedding", "type":"float[]", "embed":{"from": ["product_description"], "model_config": {"model_name": "ts/e5-small"}}}, + {"name": "rating", "type": "int32"} + ] + })"_json; + std::vector documents = { + R"({ + "product_id": "product_a", + "product_name": "shampoo", + "product_description": "Our new moisturizing shampoo is perfect for those with dry or damaged hair.", + "rating": "2" + })"_json, + R"({ + "product_id": "product_b", + "product_name": "soap", + "product_description": "Introducing our all-natural, organic soap bar made with essential oils and botanical ingredients.", + "rating": "4" + })"_json + }; + + TextEmbedderManager::set_model_dir("/tmp/typesense_test/models"); + + auto collection_create_op = collectionManager.create_collection(schema_json); + ASSERT_TRUE(collection_create_op.ok()); + for (auto const &json: documents) { + auto add_op = collection_create_op.get()->add(json.dump()); + if (!add_op.ok()) { + LOG(INFO) << add_op.error(); + } + ASSERT_TRUE(add_op.ok()); + } + + schema_json = + R"({ + "name": "Customers", + "fields": [ + {"name": "customer_id", "type": "string"}, + {"name": "customer_name", "type": "string"}, + {"name": "product_price", "type": "float"}, + {"name": "product_id", "type": "string", "reference": "Products.product_id"} + ] + })"_json; + documents = { + R"({ + "customer_id": "customer_a", + "customer_name": "Joe", + "product_price": 143, + "product_id": "product_a" + })"_json, + R"({ + "customer_id": "customer_a", + "customer_name": "Joe", + "product_price": 73.5, + "product_id": "product_b" + })"_json, + R"({ + "customer_id": "customer_b", + "customer_name": "Dan", + "product_price": 75, + "product_id": "product_a" + })"_json, + R"({ + "customer_id": "customer_b", + "customer_name": "Dan", + "product_price": 140, + "product_id": "product_b" + })"_json + }; + collection_create_op = collectionManager.create_collection(schema_json); + ASSERT_TRUE(collection_create_op.ok()); + for (auto const &json: documents) { + auto add_op = collection_create_op.get()->add(json.dump()); + if (!add_op.ok()) { + LOG(INFO) << add_op.error(); + } + ASSERT_TRUE(add_op.ok()); + } + + auto symlink_op = collectionManager.upsert_symlink("Products_alias", "Products"); + ASSERT_TRUE(symlink_op.ok()); + + symlink_op = collectionManager.upsert_symlink("$Customers_alias", "$Customers"); + ASSERT_TRUE(symlink_op.ok()); + + std::map req_params = { + {"collection", "Products_alias"}, + {"q", "*"}, + {"query_by", "product_name"}, + {"filter_by", "$Customers(customer_id:=customer_a && product_price:<100)"}, + }; + nlohmann::json embedded_params; + std::string json_res; + auto now_ts = std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()).count(); + + auto search_op = collectionManager.do_search(req_params, embedded_params, json_res, now_ts); + + nlohmann::json res_obj = nlohmann::json::parse(json_res); + ASSERT_EQ(1, res_obj["found"].get()); + ASSERT_EQ(1, res_obj["hits"].size()); + // No fields are mentioned in `include_fields`, should include all fields of Products and Customers by default. + ASSERT_EQ(7, res_obj["hits"][0]["document"].size()); + ASSERT_EQ(1, res_obj["hits"][0]["document"].count("id")); + ASSERT_EQ(1, res_obj["hits"][0]["document"].count("product_id")); + ASSERT_EQ(1, res_obj["hits"][0]["document"].count("product_name")); + ASSERT_EQ(1, res_obj["hits"][0]["document"].count("product_description")); + ASSERT_EQ(1, res_obj["hits"][0]["document"].count("embedding")); + ASSERT_EQ(1, res_obj["hits"][0]["document"].count("rating")); + // Default strategy of reference includes is nest. No alias was provided, collection name becomes the field name. + ASSERT_EQ(5, res_obj["hits"][0]["document"]["Customers"].size()); + ASSERT_EQ(1, res_obj["hits"][0]["document"]["Customers"].count("customer_id")); + ASSERT_EQ(1, res_obj["hits"][0]["document"]["Customers"].count("customer_name")); + ASSERT_EQ(1, res_obj["hits"][0]["document"]["Customers"].count("id")); + ASSERT_EQ(1, res_obj["hits"][0]["document"]["Customers"].count("product_id")); + ASSERT_EQ(1, res_obj["hits"][0]["document"]["Customers"].count("product_price")); + + req_params = { + {"collection", "Products"}, + {"q", "*"}, + {"query_by", "product_name"}, + {"filter_by", "$Customers_alias(customer_id:=customer_a && product_price:<100)"}, + }; + search_op = collectionManager.do_search(req_params, embedded_params, json_res, now_ts); + + res_obj = nlohmann::json::parse(json_res); + ASSERT_EQ(1, res_obj["found"].get()); + ASSERT_EQ(1, res_obj["hits"].size()); + // No fields are mentioned in `include_fields`, should include all fields of Products and Customers by default. + ASSERT_EQ(7, res_obj["hits"][0]["document"].size()); + ASSERT_EQ(1, res_obj["hits"][0]["document"].count("id")); + ASSERT_EQ(1, res_obj["hits"][0]["document"].count("product_id")); + ASSERT_EQ(1, res_obj["hits"][0]["document"].count("product_name")); + ASSERT_EQ(1, res_obj["hits"][0]["document"].count("product_description")); + ASSERT_EQ(1, res_obj["hits"][0]["document"].count("embedding")); + ASSERT_EQ(1, res_obj["hits"][0]["document"].count("rating")); + // Default strategy of reference includes is nest. No alias was provided, collection name becomes the field name. + ASSERT_EQ(5, res_obj["hits"][0]["document"]["Customers"].size()); + ASSERT_EQ(1, res_obj["hits"][0]["document"]["Customers"].count("customer_id")); + ASSERT_EQ(1, res_obj["hits"][0]["document"]["Customers"].count("customer_name")); + ASSERT_EQ(1, res_obj["hits"][0]["document"]["Customers"].count("id")); + ASSERT_EQ(1, res_obj["hits"][0]["document"]["Customers"].count("product_id")); + ASSERT_EQ(1, res_obj["hits"][0]["document"]["Customers"].count("product_price")); + + req_params = { + {"collection", "Products_alias"}, + {"q", "*"}, + {"query_by", "product_name"}, + {"filter_by", "$Customers_alias(customer_id:=customer_a && product_price:<100)"}, + }; + search_op = collectionManager.do_search(req_params, embedded_params, json_res, now_ts); + + res_obj = nlohmann::json::parse(json_res); + ASSERT_EQ(1, res_obj["found"].get()); + ASSERT_EQ(1, res_obj["hits"].size()); + // No fields are mentioned in `include_fields`, should include all fields of Products and Customers by default. + ASSERT_EQ(7, res_obj["hits"][0]["document"].size()); + ASSERT_EQ(1, res_obj["hits"][0]["document"].count("id")); + ASSERT_EQ(1, res_obj["hits"][0]["document"].count("product_id")); + ASSERT_EQ(1, res_obj["hits"][0]["document"].count("product_name")); + ASSERT_EQ(1, res_obj["hits"][0]["document"].count("product_description")); + ASSERT_EQ(1, res_obj["hits"][0]["document"].count("embedding")); + ASSERT_EQ(1, res_obj["hits"][0]["document"].count("rating")); + // Default strategy of reference includes is nest. No alias was provided, collection name becomes the field name. + ASSERT_EQ(5, res_obj["hits"][0]["document"]["Customers"].size()); + ASSERT_EQ(1, res_obj["hits"][0]["document"]["Customers"].count("customer_id")); + ASSERT_EQ(1, res_obj["hits"][0]["document"]["Customers"].count("customer_name")); + ASSERT_EQ(1, res_obj["hits"][0]["document"]["Customers"].count("id")); + ASSERT_EQ(1, res_obj["hits"][0]["document"]["Customers"].count("product_id")); + ASSERT_EQ(1, res_obj["hits"][0]["document"]["Customers"].count("product_price")); +} \ No newline at end of file