Add override level option to filter curated hits.

This commit is contained in:
Kishore Nallan 2022-03-13 20:53:38 +05:30
parent 9197627e81
commit 39f12edd3d
5 changed files with 133 additions and 9 deletions

View File

@ -218,7 +218,8 @@ private:
const std::map<size_t, std::vector<std::string>>& pinned_hits,
const std::vector<std::string>& hidden_hits,
std::vector<std::pair<uint32_t, uint32_t>>& included_ids,
std::vector<uint32_t>& excluded_ids, std::vector<const override_t*>& filter_overrides) const;
std::vector<uint32_t>& excluded_ids, std::vector<const override_t*>& filter_overrides,
bool& filter_curated_hits) const;
Option<bool> check_and_update_schema(nlohmann::json& document, const DIRTY_VALUES& dirty_values);
@ -408,7 +409,7 @@ public:
const size_t max_extra_prefix = INT16_MAX,
const size_t max_extra_suffix = INT16_MAX,
const size_t facet_query_num_typos = 2,
const bool filter_curated_hits = false) const;
const size_t filter_curated_hits_option = 2) const;
Option<bool> get_filter_ids(const std::string & simple_filter_query,
std::vector<std::pair<size_t, uint32_t*>>& index_ids);

View File

@ -89,6 +89,7 @@ struct override_t {
std::string filter_by;
bool remove_matched_tokens = false;
bool filter_curated_hits = false;
override_t() = default;
@ -172,6 +173,12 @@ struct override_t {
}
}
if(override_json.count("filter_curated_hits") != 0) {
if (!override_json["filter_curated_hits"].is_boolean()) {
return Option<bool>(400, "The `filter_curated_hits` must be a boolean.");
}
}
if(!id.empty()) {
override.id = id;
} else if(override_json.count("id") != 0) {
@ -210,6 +217,10 @@ struct override_t {
override.remove_matched_tokens = (override_json.count("filter_by") != 0);
}
if(override_json.count("filter_curated_hits") != 0) {
override.filter_curated_hits = override_json["filter_curated_hits"].get<bool>();
}
// we have to also detect if it is a dynamic query rule
size_t i = 0;
while(i < override.rule.query.size()) {

View File

@ -400,7 +400,8 @@ void Collection::curate_results(string& actual_query, bool enable_overrides, boo
const std::vector<std::string>& hidden_hits,
std::vector<std::pair<uint32_t, uint32_t>>& included_ids,
std::vector<uint32_t>& excluded_ids,
std::vector<const override_t*>& filter_overrides) const {
std::vector<const override_t*>& filter_overrides,
bool& filter_curated_hits) const {
std::set<uint32_t> excluded_set;
@ -465,6 +466,8 @@ void Collection::curate_results(string& actual_query, bool enable_overrides, boo
actual_query = query;
}
filter_curated_hits = override.filter_curated_hits;
}
}
}
@ -699,7 +702,7 @@ Option<nlohmann::json> Collection::search(const std::string & raw_query, const s
const size_t max_extra_prefix,
const size_t max_extra_suffix,
const size_t facet_query_num_typos,
const bool filter_curated_hits) const {
const size_t filter_curated_hits_option) const {
std::shared_lock lock(mutex);
@ -934,8 +937,14 @@ Option<nlohmann::json> Collection::search(const std::string & raw_query, const s
std::vector<const override_t*> filter_overrides;
std::string query = raw_query;
bool filter_curated_hits;
curate_results(query, enable_overrides, pre_segmented_query, pinned_hits, hidden_hits,
included_ids, excluded_ids, filter_overrides);
included_ids, excluded_ids, filter_overrides, filter_curated_hits);
if(filter_curated_hits_option == 0 || filter_curated_hits_option == 1) {
// When query param has explicit value set, override level configuration takes lower precedence.
filter_curated_hits = bool(filter_curated_hits_option);
}
/*for(auto& kv: included_ids) {
LOG(INFO) << "key: " << kv.first;

View File

@ -705,7 +705,7 @@ Option<bool> CollectionManager::do_search(std::map<std::string, std::string>& re
bool prioritize_exact_match = true;
bool pre_segmented_query = false;
bool enable_overrides = true;
bool filter_curated_hits = false;
size_t filter_curated_hits_option = 2;
std::string highlight_fields;
bool exhaustive_search = false;
size_t search_stop_millis;
@ -733,6 +733,7 @@ Option<bool> CollectionManager::do_search(std::map<std::string, std::string>& re
{MAX_EXTRA_SUFFIX, &max_extra_suffix},
{MAX_CANDIDATES, &max_candidates},
{FACET_QUERY_NUM_TYPOS, &facet_query_num_typos},
{FILTER_CURATED_HITS, &filter_curated_hits_option},
};
std::unordered_map<std::string, std::string*> str_values = {
@ -752,7 +753,6 @@ Option<bool> CollectionManager::do_search(std::map<std::string, std::string>& re
{EXHAUSTIVE_SEARCH, &exhaustive_search},
{SPLIT_JOIN_TOKENS, &split_join_tokens},
{ENABLE_OVERRIDES, &enable_overrides},
{FILTER_CURATED_HITS, &filter_curated_hits},
};
std::unordered_map<std::string, std::vector<std::string>*> str_list_values = {
@ -898,7 +898,7 @@ Option<bool> CollectionManager::do_search(std::map<std::string, std::string>& re
max_extra_prefix,
max_extra_suffix,
facet_query_num_typos,
filter_curated_hits
filter_curated_hits_option
);
uint64_t timeMillis = std::chrono::duration_cast<std::chrono::milliseconds>(

View File

@ -298,6 +298,109 @@ TEST_F(CollectionOverrideTest, OverrideJSONValidation) {
ASSERT_STREQ("The `excludes` value must be an array of objects.", parse_op.error().c_str());
}
TEST_F(CollectionOverrideTest, IncludeHitsFilterOverrides) {
// Check facet field highlight for overridden results
nlohmann::json override_json_include = {
{"id", "include-rule"},
{
"rule", {
{"query", "not-found"},
{"match", override_t::MATCH_EXACT}
}
}
};
override_json_include["includes"] = nlohmann::json::array();
override_json_include["includes"][0] = nlohmann::json::object();
override_json_include["includes"][0]["id"] = "0";
override_json_include["includes"][0]["position"] = 1;
override_json_include["includes"][1] = nlohmann::json::object();
override_json_include["includes"][1]["id"] = "2";
override_json_include["includes"][1]["position"] = 2;
override_json_include["filter_curated_hits"] = true;
override_t override_include;
override_t::parse(override_json_include, "", override_include);
coll_mul_fields->add_override(override_include);
auto results = coll_mul_fields->search("not-found", {"title"}, "points:>70", {"starring"}, {}, {0}, 10, 1, FREQUENCY,
{false}, Index::DROP_TOKENS_THRESHOLD,
spp::sparse_hash_set<std::string>(),
spp::sparse_hash_set<std::string>(), 10, "starring: will").get();
ASSERT_EQ(1, results["hits"].size());
// disable filter curation option
override_json_include["filter_curated_hits"] = false;
override_t::parse(override_json_include, "", override_include);
coll_mul_fields->add_override(override_include);
results = coll_mul_fields->search("not-found", {"title"}, "points:>70", {"starring"}, {}, {0}, 10, 1, FREQUENCY,
{false}, Index::DROP_TOKENS_THRESHOLD,
spp::sparse_hash_set<std::string>(),
spp::sparse_hash_set<std::string>(), 10, "starring: will").get();
ASSERT_EQ(2, results["hits"].size());
// remove filter curation option: by default no filtering should be done
override_json_include.erase("filter_curated_hits");
override_t::parse(override_json_include, "", override_include);
coll_mul_fields->add_override(override_include);
results = coll_mul_fields->search("not-found", {"title"}, "points:>70", {"starring"}, {}, {0}, 10, 1, FREQUENCY,
{false}, Index::DROP_TOKENS_THRESHOLD,
spp::sparse_hash_set<std::string>(),
spp::sparse_hash_set<std::string>(), 10, "starring: will").get();
ASSERT_EQ(2, results["hits"].size());
// query param configuration should take precedence over override level config
results = coll_mul_fields->search("not-found", {"title"}, "points:>70", {"starring"}, {}, {0}, 10, 1, FREQUENCY,
{false}, Index::DROP_TOKENS_THRESHOLD,
spp::sparse_hash_set<std::string>(),
spp::sparse_hash_set<std::string>(), 10, "",
30, 5,
"", 10, {}, {}, {}, 0,
"<mark>", "</mark>", {}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7, true,
4, {off}, 32767, 32767, 2, 1).get();
ASSERT_EQ(1, results["hits"].size());
// try disabling and overriding
override_json_include["filter_curated_hits"] = false;
override_t::parse(override_json_include, "", override_include);
coll_mul_fields->add_override(override_include);
results = coll_mul_fields->search("not-found", {"title"}, "points:>70", {"starring"}, {}, {0}, 10, 1, FREQUENCY,
{false}, Index::DROP_TOKENS_THRESHOLD,
spp::sparse_hash_set<std::string>(),
spp::sparse_hash_set<std::string>(), 10, "",
30, 5,
"", 10, {}, {}, {}, 0,
"<mark>", "</mark>", {}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7, true,
4, {off}, 32767, 32767, 2, 1).get();
ASSERT_EQ(1, results["hits"].size());
// try enabling and overriding
override_json_include["filter_curated_hits"] = true;
override_t::parse(override_json_include, "", override_include);
coll_mul_fields->add_override(override_include);
results = coll_mul_fields->search("not-found", {"title"}, "points:>70", {"starring"}, {}, {0}, 10, 1, FREQUENCY,
{false}, Index::DROP_TOKENS_THRESHOLD,
spp::sparse_hash_set<std::string>(),
spp::sparse_hash_set<std::string>(), 10, "",
30, 5,
"", 10, {}, {}, {}, 0,
"<mark>", "</mark>", {}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7, true,
4, {off}, 32767, 32767, 2, 0).get();
ASSERT_EQ(2, results["hits"].size());
}
TEST_F(CollectionOverrideTest, ExcludeIncludeFacetFilterQuery) {
// Check facet field highlight for overridden results
nlohmann::json override_json_include = {
@ -449,7 +552,7 @@ TEST_F(CollectionOverrideTest, IncludeExcludeHitsQuery) {
spp::sparse_hash_set<std::string>(), 10, "", 30, 5,
"", 10, pinned_hits, {}, {}, 0,
"<mark>", "</mark>", {}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7, true,
4, {off}, 32767, 32767, 2, true).get();
4, {off}, 32767, 32767, 2, 1).get();
ASSERT_EQ(4, results["found"].get<size_t>());
ASSERT_STREQ("14", results["hits"][0]["document"]["id"].get<std::string>().c_str());