From 16a1285a9b4ea7035de8ec6f2aa0bca5fb14c4f6 Mon Sep 17 00:00:00 2001 From: kishorenc Date: Fri, 27 Mar 2020 20:06:33 +0530 Subject: [PATCH] Add facet query tests for overrides. --- test/collection_faceting_test.cpp | 1 - test/collection_override_test.cpp | 254 ++++++++++++++++++++++++++++++ test/collection_sorting_test.cpp | 1 - test/collection_test.cpp | 136 ---------------- 4 files changed, 254 insertions(+), 138 deletions(-) create mode 100644 test/collection_override_test.cpp diff --git a/test/collection_faceting_test.cpp b/test/collection_faceting_test.cpp index 843d8f68..796d8e8c 100644 --- a/test/collection_faceting_test.cpp +++ b/test/collection_faceting_test.cpp @@ -29,7 +29,6 @@ protected: } virtual void TearDown() { - collectionManager.drop_collection("collection"); delete store; } }; diff --git a/test/collection_override_test.cpp b/test/collection_override_test.cpp new file mode 100644 index 00000000..a2539da6 --- /dev/null +++ b/test/collection_override_test.cpp @@ -0,0 +1,254 @@ +#include +#include +#include +#include +#include +#include +#include "collection.h" + +class CollectionOverrideTest : public ::testing::Test { +protected: + Store *store; + CollectionManager & collectionManager = CollectionManager::get_instance(); + Collection *coll_mul_fields; + + void setupCollection() { + std::string state_dir_path = "/tmp/typesense_test/collection_override"; + LOG(INFO) << "Truncating and creating: " << state_dir_path; + system(("rm -rf "+state_dir_path+" && mkdir -p "+state_dir_path).c_str()); + + store = new Store(state_dir_path); + collectionManager.init(store, 1, "auth_key", "search_auth_key"); + collectionManager.load(); + + std::ifstream infile(std::string(ROOT_DIR)+"test/multi_field_documents.jsonl"); + std::vector fields = { + field("title", field_types::STRING, false), + field("starring", field_types::STRING, true), + field("cast", field_types::STRING_ARRAY, true), + field("points", field_types::INT32, false) + }; + + coll_mul_fields = collectionManager.get_collection("coll_mul_fields"); + if(coll_mul_fields == nullptr) { + coll_mul_fields = collectionManager.create_collection("coll_mul_fields", fields, "points").get(); + } + + std::string json_line; + + while (std::getline(infile, json_line)) { + coll_mul_fields->add(json_line); + } + + infile.close(); + } + + virtual void SetUp() { + setupCollection(); + } + + virtual void TearDown() { + collectionManager.drop_collection("coll_mul_fields"); + delete store; + } +}; + +TEST_F(CollectionOverrideTest, ExcludeIncludeExactQueryMatch) { + nlohmann::json override_json = { + {"id", "exclude-rule"}, + { + "rule", { + {"query", "of"}, + {"match", override_t::MATCH_EXACT} + } + } + }; + override_json["excludes"] = nlohmann::json::array(); + override_json["excludes"][0] = nlohmann::json::object(); + override_json["excludes"][0]["id"] = "4"; + + override_json["excludes"][1] = nlohmann::json::object(); + override_json["excludes"][1]["id"] = "11"; + + override_t override(override_json); + coll_mul_fields->add_override(override); + + std::vector facets = {"cast"}; + + Option res_op = coll_mul_fields->search("of", {"title"}, "", facets, {}, 0, 10); + ASSERT_TRUE(res_op.ok()); + nlohmann::json results = res_op.get(); + + ASSERT_EQ(3, results["hits"].size()); + ASSERT_EQ(3, results["found"].get()); + ASSERT_EQ(6, results["facet_counts"][0]["counts"].size()); + + ASSERT_STREQ("12", results["hits"][0]["document"]["id"].get().c_str()); + ASSERT_STREQ("5", results["hits"][1]["document"]["id"].get().c_str()); + ASSERT_STREQ("17", results["hits"][2]["document"]["id"].get().c_str()); + + // include + nlohmann::json override_json_include = { + {"id", "include-rule"}, + { + "rule", { + {"query", "in"}, + {"match", override_t::MATCH_EXACT} + } + } + }; + override_json_include["includes"] = nlohmann::json::array(); + override_json_include["includes"][0] = nlohmann::json::object(); + override_json_include["includes"][0]["id"] = "0"; + override_json_include["includes"][0]["position"] = 1; + + override_json_include["includes"][1] = nlohmann::json::object(); + override_json_include["includes"][1]["id"] = "3"; + override_json_include["includes"][1]["position"] = 2; + + override_t override_include(override_json_include); + + coll_mul_fields->add_override(override_include); + + res_op = coll_mul_fields->search("in", {"title"}, "", {}, {}, 0, 10); + ASSERT_TRUE(res_op.ok()); + results = res_op.get(); + + ASSERT_EQ(3, results["hits"].size()); + ASSERT_EQ(3, results["found"].get()); + + ASSERT_STREQ("0", results["hits"][0]["document"]["id"].get().c_str()); + ASSERT_STREQ("3", results["hits"][1]["document"]["id"].get().c_str()); + ASSERT_STREQ("13", results["hits"][2]["document"]["id"].get().c_str()); + + coll_mul_fields->remove_override("exclude-rule"); + coll_mul_fields->remove_override("include-rule"); + + // contains cases + + nlohmann::json override_contains_inc = { + {"id", "include-rule"}, + { + "rule", { + {"query", "will"}, + {"match", override_t::MATCH_CONTAINS} + } + } + }; + override_contains_inc["includes"] = nlohmann::json::array(); + override_contains_inc["includes"][0] = nlohmann::json::object(); + override_contains_inc["includes"][0]["id"] = "0"; + override_contains_inc["includes"][0]["position"] = 1; + + override_contains_inc["includes"][1] = nlohmann::json::object(); + override_contains_inc["includes"][1]["id"] = "1"; + override_contains_inc["includes"][1]["position"] = 7; // purposely setting it way out + + override_t override_inc_contains(override_contains_inc); + coll_mul_fields->add_override(override_inc_contains); + + res_op = coll_mul_fields->search("will smith", {"title"}, "", {}, {}, 0, 10); + ASSERT_TRUE(res_op.ok()); + results = res_op.get(); + + ASSERT_EQ(4, results["hits"].size()); + ASSERT_EQ(4, results["found"].get()); + + ASSERT_STREQ("0", results["hits"][0]["document"]["id"].get().c_str()); + ASSERT_STREQ("3", results["hits"][1]["document"]["id"].get().c_str()); + ASSERT_STREQ("2", results["hits"][2]["document"]["id"].get().c_str()); + ASSERT_STREQ("1", results["hits"][3]["document"]["id"].get().c_str()); + + coll_mul_fields->remove_override("include-rule"); +} + +TEST_F(CollectionOverrideTest, ExcludeIncludeFacetFilterQuery) { + // Check facet field highlight for overridden results + nlohmann::json override_json_include = { + {"id", "include-rule"}, + { + "rule", { + {"query", "not-found"}, + {"match", override_t::MATCH_EXACT} + } + } + }; + + override_json_include["includes"] = nlohmann::json::array(); + override_json_include["includes"][0] = nlohmann::json::object(); + override_json_include["includes"][0]["id"] = "0"; + override_json_include["includes"][0]["position"] = 1; + + override_json_include["includes"][1] = nlohmann::json::object(); + override_json_include["includes"][1]["id"] = "2"; + override_json_include["includes"][1]["position"] = 2; + + override_t override_include(override_json_include); + coll_mul_fields->add_override(override_include); + + auto results = coll_mul_fields->search("not-found", {"title"}, "", {"starring"}, {}, 0, 10, 1, FREQUENCY, + false, Index::DROP_TOKENS_THRESHOLD, + spp::sparse_hash_set(), + spp::sparse_hash_set(), 10, 500, "starring: will").get(); + + ASSERT_EQ("Will Ferrell", results["facet_counts"][0]["counts"][0]["highlighted"].get()); + ASSERT_EQ("Will Ferrell", results["facet_counts"][0]["counts"][0]["value"].get()); + ASSERT_EQ(1, results["facet_counts"][0]["counts"][0]["count"].get()); + + coll_mul_fields->remove_override("include-rule"); + + // facet count is okay when results are excluded + nlohmann::json override_json_exclude = { + {"id", "exclude-rule"}, + { + "rule", { + {"query", "the"}, + {"match", override_t::MATCH_EXACT} + } + } + }; + override_json_exclude["excludes"] = nlohmann::json::array(); + override_json_exclude["excludes"][0] = nlohmann::json::object(); + override_json_exclude["excludes"][0]["id"] = "10"; + + override_t override(override_json_exclude); + coll_mul_fields->add_override(override); + + results = coll_mul_fields->search("the", {"title"}, "", {"starring"}, {}, 0, 10, 1, FREQUENCY, + false, Index::DROP_TOKENS_THRESHOLD, + spp::sparse_hash_set(), + spp::sparse_hash_set(), 10, 500, "starring: scott").get(); + + // "count" would be `2` without exclusion + ASSERT_EQ("Scott Glenn", results["facet_counts"][0]["counts"][0]["highlighted"].get()); + ASSERT_EQ(1, results["facet_counts"][0]["counts"][0]["count"].get()); + + ASSERT_EQ("Kristin Scott Thomas", results["facet_counts"][0]["counts"][1]["highlighted"].get()); + ASSERT_EQ(1, results["facet_counts"][0]["counts"][1]["count"].get()); + + // ensure max_hits is respected + // first with max_hits = 0 + results = coll_mul_fields->search("the", {"title"}, "", {"starring"}, {}, 0, 10, 1, FREQUENCY, + false, Index::DROP_TOKENS_THRESHOLD, + spp::sparse_hash_set(), + spp::sparse_hash_set(), 10, 0, "starring: scott").get(); + + ASSERT_EQ(10, results["found"].get()); + ASSERT_EQ(0, results["hits"].size()); + + coll_mul_fields->remove_override("exclude-rule"); + + // now with max_hits = 1, and an include query + + coll_mul_fields->add_override(override_include); + results = coll_mul_fields->search("not-found", {"title"}, "", {"starring"}, {}, 0, 10, 1, FREQUENCY, + false, Index::DROP_TOKENS_THRESHOLD, + spp::sparse_hash_set(), + spp::sparse_hash_set(), 10, 1, "").get(); + + ASSERT_EQ(1, results["found"].get()); + ASSERT_EQ(1, results["hits"].size()); + ASSERT_EQ("0", results["hits"][0]["document"]["id"].get()); + + coll_mul_fields->remove_override("include-rule"); +} \ No newline at end of file diff --git a/test/collection_sorting_test.cpp b/test/collection_sorting_test.cpp index 9004d6f7..ed275529 100644 --- a/test/collection_sorting_test.cpp +++ b/test/collection_sorting_test.cpp @@ -29,7 +29,6 @@ protected: } virtual void TearDown() { - collectionManager.drop_collection("collection"); delete store; } }; diff --git a/test/collection_test.cpp b/test/collection_test.cpp index ed250b88..5c0be7dc 100644 --- a/test/collection_test.cpp +++ b/test/collection_test.cpp @@ -865,142 +865,6 @@ TEST_F(CollectionTest, MultipleFields) { collectionManager.drop_collection("coll_mul_fields"); } -TEST_F(CollectionTest, ExcludeIncludeExactQueryMatch) { - Collection *coll_mul_fields; - - std::ifstream infile(std::string(ROOT_DIR)+"test/multi_field_documents.jsonl"); - std::vector fields = { - field("title", field_types::STRING, false), - field("starring", field_types::STRING, false), - field("cast", field_types::STRING_ARRAY, true), - field("points", field_types::INT32, false) - }; - - coll_mul_fields = collectionManager.get_collection("coll_mul_fields"); - if(coll_mul_fields == nullptr) { - coll_mul_fields = collectionManager.create_collection("coll_mul_fields", fields, "points").get(); - } - - std::string json_line; - - while (std::getline(infile, json_line)) { - coll_mul_fields->add(json_line); - } - - infile.close(); - - // with override - nlohmann::json override_json = { - {"id", "exclude-rule"}, - { - "rule", { - {"query", "of"}, - {"match", override_t::MATCH_EXACT} - } - } - }; - override_json["excludes"] = nlohmann::json::array(); - override_json["excludes"][0] = nlohmann::json::object(); - override_json["excludes"][0]["id"] = "4"; - - override_json["excludes"][1] = nlohmann::json::object(); - override_json["excludes"][1]["id"] = "11"; - - override_t override(override_json); - - coll_mul_fields->add_override(override); - - std::vector facets = {"cast"}; - - Option res_op = coll_mul_fields->search("of", {"title"}, "", facets, sort_fields, 0, 10); - ASSERT_TRUE(res_op.ok()); - nlohmann::json results = res_op.get(); - - ASSERT_EQ(3, results["hits"].size()); - ASSERT_EQ(3, results["found"].get()); - ASSERT_EQ(6, results["facet_counts"][0]["counts"].size()); - - ASSERT_STREQ("12", results["hits"][0]["document"]["id"].get().c_str()); - ASSERT_STREQ("5", results["hits"][1]["document"]["id"].get().c_str()); - ASSERT_STREQ("17", results["hits"][2]["document"]["id"].get().c_str()); - - // include - nlohmann::json override_json_include = { - {"id", "include-rule"}, - { - "rule", { - {"query", "in"}, - {"match", override_t::MATCH_EXACT} - } - } - }; - override_json_include["includes"] = nlohmann::json::array(); - override_json_include["includes"][0] = nlohmann::json::object(); - override_json_include["includes"][0]["id"] = "0"; - override_json_include["includes"][0]["position"] = 1; - - override_json_include["includes"][1] = nlohmann::json::object(); - override_json_include["includes"][1]["id"] = "3"; - override_json_include["includes"][1]["position"] = 2; - - override_t override_include(override_json_include); - - coll_mul_fields->add_override(override_include); - - res_op = coll_mul_fields->search("in", {"title"}, "", {}, sort_fields, 0, 10); - ASSERT_TRUE(res_op.ok()); - results = res_op.get(); - - ASSERT_EQ(3, results["hits"].size()); - ASSERT_EQ(3, results["found"].get()); - - ASSERT_STREQ("0", results["hits"][0]["document"]["id"].get().c_str()); - ASSERT_STREQ("3", results["hits"][1]["document"]["id"].get().c_str()); - ASSERT_STREQ("13", results["hits"][2]["document"]["id"].get().c_str()); - - coll_mul_fields->remove_override("exclude-rule"); - coll_mul_fields->remove_override("include-rule"); - - // contains cases - - nlohmann::json override_contains_inc = { - {"id", "include-rule"}, - { - "rule", { - {"query", "will"}, - {"match", override_t::MATCH_CONTAINS} - } - } - }; - override_contains_inc["includes"] = nlohmann::json::array(); - override_contains_inc["includes"][0] = nlohmann::json::object(); - override_contains_inc["includes"][0]["id"] = "0"; - override_contains_inc["includes"][0]["position"] = 1; - - override_contains_inc["includes"][1] = nlohmann::json::object(); - override_contains_inc["includes"][1]["id"] = "1"; - override_contains_inc["includes"][1]["position"] = 7; // purposely setting it way out - - override_t override_inc_contains(override_contains_inc); - coll_mul_fields->add_override(override_inc_contains); - - res_op = coll_mul_fields->search("will smith", {"title"}, "", {}, sort_fields, 0, 10); - ASSERT_TRUE(res_op.ok()); - results = res_op.get(); - - ASSERT_EQ(4, results["hits"].size()); - ASSERT_EQ(4, results["found"].get()); - - ASSERT_STREQ("0", results["hits"][0]["document"]["id"].get().c_str()); - ASSERT_STREQ("3", results["hits"][1]["document"]["id"].get().c_str()); - ASSERT_STREQ("2", results["hits"][2]["document"]["id"].get().c_str()); - ASSERT_STREQ("1", results["hits"][3]["document"]["id"].get().c_str()); - - coll_mul_fields->remove_override("include-rule"); - - collectionManager.drop_collection("coll_mul_fields"); -} - TEST_F(CollectionTest, FilterAndQueryFieldRestrictions) { Collection *coll_mul_fields;