From 39f12edd3d2588456f9b9fa1418a003767410710 Mon Sep 17 00:00:00 2001 From: Kishore Nallan Date: Sun, 13 Mar 2022 20:53:38 +0530 Subject: [PATCH] Add override level option to filter curated hits. --- include/collection.h | 5 +- include/index.h | 11 ++++ src/collection.cpp | 15 ++++- src/collection_manager.cpp | 6 +- test/collection_override_test.cpp | 105 +++++++++++++++++++++++++++++- 5 files changed, 133 insertions(+), 9 deletions(-) diff --git a/include/collection.h b/include/collection.h index e54eed0c..6e0c4ee0 100644 --- a/include/collection.h +++ b/include/collection.h @@ -218,7 +218,8 @@ private: const std::map>& pinned_hits, const std::vector& hidden_hits, std::vector>& included_ids, - std::vector& excluded_ids, std::vector& filter_overrides) const; + std::vector& excluded_ids, std::vector& filter_overrides, + bool& filter_curated_hits) const; Option check_and_update_schema(nlohmann::json& document, const DIRTY_VALUES& dirty_values); @@ -408,7 +409,7 @@ public: const size_t max_extra_prefix = INT16_MAX, const size_t max_extra_suffix = INT16_MAX, const size_t facet_query_num_typos = 2, - const bool filter_curated_hits = false) const; + const size_t filter_curated_hits_option = 2) const; Option get_filter_ids(const std::string & simple_filter_query, std::vector>& index_ids); diff --git a/include/index.h b/include/index.h index 497bd5af..b8f7cd0a 100644 --- a/include/index.h +++ b/include/index.h @@ -89,6 +89,7 @@ struct override_t { std::string filter_by; bool remove_matched_tokens = false; + bool filter_curated_hits = false; override_t() = default; @@ -172,6 +173,12 @@ struct override_t { } } + if(override_json.count("filter_curated_hits") != 0) { + if (!override_json["filter_curated_hits"].is_boolean()) { + return Option(400, "The `filter_curated_hits` must be a boolean."); + } + } + if(!id.empty()) { override.id = id; } else if(override_json.count("id") != 0) { @@ -210,6 +217,10 @@ struct override_t { override.remove_matched_tokens = (override_json.count("filter_by") != 0); } + if(override_json.count("filter_curated_hits") != 0) { + override.filter_curated_hits = override_json["filter_curated_hits"].get(); + } + // we have to also detect if it is a dynamic query rule size_t i = 0; while(i < override.rule.query.size()) { diff --git a/src/collection.cpp b/src/collection.cpp index 7cd2faf4..1e702a4f 100644 --- a/src/collection.cpp +++ b/src/collection.cpp @@ -400,7 +400,8 @@ void Collection::curate_results(string& actual_query, bool enable_overrides, boo const std::vector& hidden_hits, std::vector>& included_ids, std::vector& excluded_ids, - std::vector& filter_overrides) const { + std::vector& filter_overrides, + bool& filter_curated_hits) const { std::set excluded_set; @@ -465,6 +466,8 @@ void Collection::curate_results(string& actual_query, bool enable_overrides, boo actual_query = query; } + + filter_curated_hits = override.filter_curated_hits; } } } @@ -699,7 +702,7 @@ Option Collection::search(const std::string & raw_query, const s const size_t max_extra_prefix, const size_t max_extra_suffix, const size_t facet_query_num_typos, - const bool filter_curated_hits) const { + const size_t filter_curated_hits_option) const { std::shared_lock lock(mutex); @@ -934,8 +937,14 @@ Option Collection::search(const std::string & raw_query, const s std::vector filter_overrides; std::string query = raw_query; + bool filter_curated_hits; curate_results(query, enable_overrides, pre_segmented_query, pinned_hits, hidden_hits, - included_ids, excluded_ids, filter_overrides); + included_ids, excluded_ids, filter_overrides, filter_curated_hits); + + if(filter_curated_hits_option == 0 || filter_curated_hits_option == 1) { + // When query param has explicit value set, override level configuration takes lower precedence. + filter_curated_hits = bool(filter_curated_hits_option); + } /*for(auto& kv: included_ids) { LOG(INFO) << "key: " << kv.first; diff --git a/src/collection_manager.cpp b/src/collection_manager.cpp index 0eeacbb3..3e43f658 100644 --- a/src/collection_manager.cpp +++ b/src/collection_manager.cpp @@ -705,7 +705,7 @@ Option CollectionManager::do_search(std::map& re bool prioritize_exact_match = true; bool pre_segmented_query = false; bool enable_overrides = true; - bool filter_curated_hits = false; + size_t filter_curated_hits_option = 2; std::string highlight_fields; bool exhaustive_search = false; size_t search_stop_millis; @@ -733,6 +733,7 @@ Option CollectionManager::do_search(std::map& re {MAX_EXTRA_SUFFIX, &max_extra_suffix}, {MAX_CANDIDATES, &max_candidates}, {FACET_QUERY_NUM_TYPOS, &facet_query_num_typos}, + {FILTER_CURATED_HITS, &filter_curated_hits_option}, }; std::unordered_map str_values = { @@ -752,7 +753,6 @@ Option CollectionManager::do_search(std::map& re {EXHAUSTIVE_SEARCH, &exhaustive_search}, {SPLIT_JOIN_TOKENS, &split_join_tokens}, {ENABLE_OVERRIDES, &enable_overrides}, - {FILTER_CURATED_HITS, &filter_curated_hits}, }; std::unordered_map*> str_list_values = { @@ -898,7 +898,7 @@ Option CollectionManager::do_search(std::map& re max_extra_prefix, max_extra_suffix, facet_query_num_typos, - filter_curated_hits + filter_curated_hits_option ); uint64_t timeMillis = std::chrono::duration_cast( diff --git a/test/collection_override_test.cpp b/test/collection_override_test.cpp index d2d5f2a3..acd7097e 100644 --- a/test/collection_override_test.cpp +++ b/test/collection_override_test.cpp @@ -298,6 +298,109 @@ TEST_F(CollectionOverrideTest, OverrideJSONValidation) { ASSERT_STREQ("The `excludes` value must be an array of objects.", parse_op.error().c_str()); } +TEST_F(CollectionOverrideTest, IncludeHitsFilterOverrides) { + // Check facet field highlight for overridden results + nlohmann::json override_json_include = { + {"id", "include-rule"}, + { + "rule", { + {"query", "not-found"}, + {"match", override_t::MATCH_EXACT} + } + } + }; + + override_json_include["includes"] = nlohmann::json::array(); + override_json_include["includes"][0] = nlohmann::json::object(); + override_json_include["includes"][0]["id"] = "0"; + override_json_include["includes"][0]["position"] = 1; + + override_json_include["includes"][1] = nlohmann::json::object(); + override_json_include["includes"][1]["id"] = "2"; + override_json_include["includes"][1]["position"] = 2; + + override_json_include["filter_curated_hits"] = true; + + override_t override_include; + override_t::parse(override_json_include, "", override_include); + coll_mul_fields->add_override(override_include); + + auto results = coll_mul_fields->search("not-found", {"title"}, "points:>70", {"starring"}, {}, {0}, 10, 1, FREQUENCY, + {false}, Index::DROP_TOKENS_THRESHOLD, + spp::sparse_hash_set(), + spp::sparse_hash_set(), 10, "starring: will").get(); + + ASSERT_EQ(1, results["hits"].size()); + + // disable filter curation option + override_json_include["filter_curated_hits"] = false; + override_t::parse(override_json_include, "", override_include); + coll_mul_fields->add_override(override_include); + results = coll_mul_fields->search("not-found", {"title"}, "points:>70", {"starring"}, {}, {0}, 10, 1, FREQUENCY, + {false}, Index::DROP_TOKENS_THRESHOLD, + spp::sparse_hash_set(), + spp::sparse_hash_set(), 10, "starring: will").get(); + + ASSERT_EQ(2, results["hits"].size()); + + // remove filter curation option: by default no filtering should be done + override_json_include.erase("filter_curated_hits"); + override_t::parse(override_json_include, "", override_include); + coll_mul_fields->add_override(override_include); + results = coll_mul_fields->search("not-found", {"title"}, "points:>70", {"starring"}, {}, {0}, 10, 1, FREQUENCY, + {false}, Index::DROP_TOKENS_THRESHOLD, + spp::sparse_hash_set(), + spp::sparse_hash_set(), 10, "starring: will").get(); + + ASSERT_EQ(2, results["hits"].size()); + + // query param configuration should take precedence over override level config + results = coll_mul_fields->search("not-found", {"title"}, "points:>70", {"starring"}, {}, {0}, 10, 1, FREQUENCY, + {false}, Index::DROP_TOKENS_THRESHOLD, + spp::sparse_hash_set(), + spp::sparse_hash_set(), 10, "", + 30, 5, + "", 10, {}, {}, {}, 0, + "", "", {}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7, true, + 4, {off}, 32767, 32767, 2, 1).get(); + + ASSERT_EQ(1, results["hits"].size()); + + // try disabling and overriding + + override_json_include["filter_curated_hits"] = false; + override_t::parse(override_json_include, "", override_include); + coll_mul_fields->add_override(override_include); + + results = coll_mul_fields->search("not-found", {"title"}, "points:>70", {"starring"}, {}, {0}, 10, 1, FREQUENCY, + {false}, Index::DROP_TOKENS_THRESHOLD, + spp::sparse_hash_set(), + spp::sparse_hash_set(), 10, "", + 30, 5, + "", 10, {}, {}, {}, 0, + "", "", {}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7, true, + 4, {off}, 32767, 32767, 2, 1).get(); + + ASSERT_EQ(1, results["hits"].size()); + + // try enabling and overriding + override_json_include["filter_curated_hits"] = true; + override_t::parse(override_json_include, "", override_include); + coll_mul_fields->add_override(override_include); + + results = coll_mul_fields->search("not-found", {"title"}, "points:>70", {"starring"}, {}, {0}, 10, 1, FREQUENCY, + {false}, Index::DROP_TOKENS_THRESHOLD, + spp::sparse_hash_set(), + spp::sparse_hash_set(), 10, "", + 30, 5, + "", 10, {}, {}, {}, 0, + "", "", {}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7, true, + 4, {off}, 32767, 32767, 2, 0).get(); + + ASSERT_EQ(2, results["hits"].size()); + +} + TEST_F(CollectionOverrideTest, ExcludeIncludeFacetFilterQuery) { // Check facet field highlight for overridden results nlohmann::json override_json_include = { @@ -449,7 +552,7 @@ TEST_F(CollectionOverrideTest, IncludeExcludeHitsQuery) { spp::sparse_hash_set(), 10, "", 30, 5, "", 10, pinned_hits, {}, {}, 0, "", "", {}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7, true, - 4, {off}, 32767, 32767, 2, true).get(); + 4, {off}, 32767, 32767, 2, 1).get(); ASSERT_EQ(4, results["found"].get()); ASSERT_STREQ("14", results["hits"][0]["document"]["id"].get().c_str());