mirror of
https://github.com/typesense/typesense.git
synced 2025-05-22 23:06:30 +08:00
Add override level option to filter curated hits.
This commit is contained in:
parent
9197627e81
commit
39f12edd3d
@ -218,7 +218,8 @@ private:
|
||||
const std::map<size_t, std::vector<std::string>>& pinned_hits,
|
||||
const std::vector<std::string>& hidden_hits,
|
||||
std::vector<std::pair<uint32_t, uint32_t>>& included_ids,
|
||||
std::vector<uint32_t>& excluded_ids, std::vector<const override_t*>& filter_overrides) const;
|
||||
std::vector<uint32_t>& excluded_ids, std::vector<const override_t*>& filter_overrides,
|
||||
bool& filter_curated_hits) const;
|
||||
|
||||
Option<bool> check_and_update_schema(nlohmann::json& document, const DIRTY_VALUES& dirty_values);
|
||||
|
||||
@ -408,7 +409,7 @@ public:
|
||||
const size_t max_extra_prefix = INT16_MAX,
|
||||
const size_t max_extra_suffix = INT16_MAX,
|
||||
const size_t facet_query_num_typos = 2,
|
||||
const bool filter_curated_hits = false) const;
|
||||
const size_t filter_curated_hits_option = 2) const;
|
||||
|
||||
Option<bool> get_filter_ids(const std::string & simple_filter_query,
|
||||
std::vector<std::pair<size_t, uint32_t*>>& index_ids);
|
||||
|
@ -89,6 +89,7 @@ struct override_t {
|
||||
|
||||
std::string filter_by;
|
||||
bool remove_matched_tokens = false;
|
||||
bool filter_curated_hits = false;
|
||||
|
||||
override_t() = default;
|
||||
|
||||
@ -172,6 +173,12 @@ struct override_t {
|
||||
}
|
||||
}
|
||||
|
||||
if(override_json.count("filter_curated_hits") != 0) {
|
||||
if (!override_json["filter_curated_hits"].is_boolean()) {
|
||||
return Option<bool>(400, "The `filter_curated_hits` must be a boolean.");
|
||||
}
|
||||
}
|
||||
|
||||
if(!id.empty()) {
|
||||
override.id = id;
|
||||
} else if(override_json.count("id") != 0) {
|
||||
@ -210,6 +217,10 @@ struct override_t {
|
||||
override.remove_matched_tokens = (override_json.count("filter_by") != 0);
|
||||
}
|
||||
|
||||
if(override_json.count("filter_curated_hits") != 0) {
|
||||
override.filter_curated_hits = override_json["filter_curated_hits"].get<bool>();
|
||||
}
|
||||
|
||||
// we have to also detect if it is a dynamic query rule
|
||||
size_t i = 0;
|
||||
while(i < override.rule.query.size()) {
|
||||
|
@ -400,7 +400,8 @@ void Collection::curate_results(string& actual_query, bool enable_overrides, boo
|
||||
const std::vector<std::string>& hidden_hits,
|
||||
std::vector<std::pair<uint32_t, uint32_t>>& included_ids,
|
||||
std::vector<uint32_t>& excluded_ids,
|
||||
std::vector<const override_t*>& filter_overrides) const {
|
||||
std::vector<const override_t*>& filter_overrides,
|
||||
bool& filter_curated_hits) const {
|
||||
|
||||
std::set<uint32_t> excluded_set;
|
||||
|
||||
@ -465,6 +466,8 @@ void Collection::curate_results(string& actual_query, bool enable_overrides, boo
|
||||
|
||||
actual_query = query;
|
||||
}
|
||||
|
||||
filter_curated_hits = override.filter_curated_hits;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -699,7 +702,7 @@ Option<nlohmann::json> Collection::search(const std::string & raw_query, const s
|
||||
const size_t max_extra_prefix,
|
||||
const size_t max_extra_suffix,
|
||||
const size_t facet_query_num_typos,
|
||||
const bool filter_curated_hits) const {
|
||||
const size_t filter_curated_hits_option) const {
|
||||
|
||||
std::shared_lock lock(mutex);
|
||||
|
||||
@ -934,8 +937,14 @@ Option<nlohmann::json> Collection::search(const std::string & raw_query, const s
|
||||
|
||||
std::vector<const override_t*> filter_overrides;
|
||||
std::string query = raw_query;
|
||||
bool filter_curated_hits;
|
||||
curate_results(query, enable_overrides, pre_segmented_query, pinned_hits, hidden_hits,
|
||||
included_ids, excluded_ids, filter_overrides);
|
||||
included_ids, excluded_ids, filter_overrides, filter_curated_hits);
|
||||
|
||||
if(filter_curated_hits_option == 0 || filter_curated_hits_option == 1) {
|
||||
// When query param has explicit value set, override level configuration takes lower precedence.
|
||||
filter_curated_hits = bool(filter_curated_hits_option);
|
||||
}
|
||||
|
||||
/*for(auto& kv: included_ids) {
|
||||
LOG(INFO) << "key: " << kv.first;
|
||||
|
@ -705,7 +705,7 @@ Option<bool> CollectionManager::do_search(std::map<std::string, std::string>& re
|
||||
bool prioritize_exact_match = true;
|
||||
bool pre_segmented_query = false;
|
||||
bool enable_overrides = true;
|
||||
bool filter_curated_hits = false;
|
||||
size_t filter_curated_hits_option = 2;
|
||||
std::string highlight_fields;
|
||||
bool exhaustive_search = false;
|
||||
size_t search_stop_millis;
|
||||
@ -733,6 +733,7 @@ Option<bool> CollectionManager::do_search(std::map<std::string, std::string>& re
|
||||
{MAX_EXTRA_SUFFIX, &max_extra_suffix},
|
||||
{MAX_CANDIDATES, &max_candidates},
|
||||
{FACET_QUERY_NUM_TYPOS, &facet_query_num_typos},
|
||||
{FILTER_CURATED_HITS, &filter_curated_hits_option},
|
||||
};
|
||||
|
||||
std::unordered_map<std::string, std::string*> str_values = {
|
||||
@ -752,7 +753,6 @@ Option<bool> CollectionManager::do_search(std::map<std::string, std::string>& re
|
||||
{EXHAUSTIVE_SEARCH, &exhaustive_search},
|
||||
{SPLIT_JOIN_TOKENS, &split_join_tokens},
|
||||
{ENABLE_OVERRIDES, &enable_overrides},
|
||||
{FILTER_CURATED_HITS, &filter_curated_hits},
|
||||
};
|
||||
|
||||
std::unordered_map<std::string, std::vector<std::string>*> str_list_values = {
|
||||
@ -898,7 +898,7 @@ Option<bool> CollectionManager::do_search(std::map<std::string, std::string>& re
|
||||
max_extra_prefix,
|
||||
max_extra_suffix,
|
||||
facet_query_num_typos,
|
||||
filter_curated_hits
|
||||
filter_curated_hits_option
|
||||
);
|
||||
|
||||
uint64_t timeMillis = std::chrono::duration_cast<std::chrono::milliseconds>(
|
||||
|
@ -298,6 +298,109 @@ TEST_F(CollectionOverrideTest, OverrideJSONValidation) {
|
||||
ASSERT_STREQ("The `excludes` value must be an array of objects.", parse_op.error().c_str());
|
||||
}
|
||||
|
||||
TEST_F(CollectionOverrideTest, IncludeHitsFilterOverrides) {
|
||||
// Check facet field highlight for overridden results
|
||||
nlohmann::json override_json_include = {
|
||||
{"id", "include-rule"},
|
||||
{
|
||||
"rule", {
|
||||
{"query", "not-found"},
|
||||
{"match", override_t::MATCH_EXACT}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
override_json_include["includes"] = nlohmann::json::array();
|
||||
override_json_include["includes"][0] = nlohmann::json::object();
|
||||
override_json_include["includes"][0]["id"] = "0";
|
||||
override_json_include["includes"][0]["position"] = 1;
|
||||
|
||||
override_json_include["includes"][1] = nlohmann::json::object();
|
||||
override_json_include["includes"][1]["id"] = "2";
|
||||
override_json_include["includes"][1]["position"] = 2;
|
||||
|
||||
override_json_include["filter_curated_hits"] = true;
|
||||
|
||||
override_t override_include;
|
||||
override_t::parse(override_json_include, "", override_include);
|
||||
coll_mul_fields->add_override(override_include);
|
||||
|
||||
auto results = coll_mul_fields->search("not-found", {"title"}, "points:>70", {"starring"}, {}, {0}, 10, 1, FREQUENCY,
|
||||
{false}, Index::DROP_TOKENS_THRESHOLD,
|
||||
spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "starring: will").get();
|
||||
|
||||
ASSERT_EQ(1, results["hits"].size());
|
||||
|
||||
// disable filter curation option
|
||||
override_json_include["filter_curated_hits"] = false;
|
||||
override_t::parse(override_json_include, "", override_include);
|
||||
coll_mul_fields->add_override(override_include);
|
||||
results = coll_mul_fields->search("not-found", {"title"}, "points:>70", {"starring"}, {}, {0}, 10, 1, FREQUENCY,
|
||||
{false}, Index::DROP_TOKENS_THRESHOLD,
|
||||
spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "starring: will").get();
|
||||
|
||||
ASSERT_EQ(2, results["hits"].size());
|
||||
|
||||
// remove filter curation option: by default no filtering should be done
|
||||
override_json_include.erase("filter_curated_hits");
|
||||
override_t::parse(override_json_include, "", override_include);
|
||||
coll_mul_fields->add_override(override_include);
|
||||
results = coll_mul_fields->search("not-found", {"title"}, "points:>70", {"starring"}, {}, {0}, 10, 1, FREQUENCY,
|
||||
{false}, Index::DROP_TOKENS_THRESHOLD,
|
||||
spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "starring: will").get();
|
||||
|
||||
ASSERT_EQ(2, results["hits"].size());
|
||||
|
||||
// query param configuration should take precedence over override level config
|
||||
results = coll_mul_fields->search("not-found", {"title"}, "points:>70", {"starring"}, {}, {0}, 10, 1, FREQUENCY,
|
||||
{false}, Index::DROP_TOKENS_THRESHOLD,
|
||||
spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "",
|
||||
30, 5,
|
||||
"", 10, {}, {}, {}, 0,
|
||||
"<mark>", "</mark>", {}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7, true,
|
||||
4, {off}, 32767, 32767, 2, 1).get();
|
||||
|
||||
ASSERT_EQ(1, results["hits"].size());
|
||||
|
||||
// try disabling and overriding
|
||||
|
||||
override_json_include["filter_curated_hits"] = false;
|
||||
override_t::parse(override_json_include, "", override_include);
|
||||
coll_mul_fields->add_override(override_include);
|
||||
|
||||
results = coll_mul_fields->search("not-found", {"title"}, "points:>70", {"starring"}, {}, {0}, 10, 1, FREQUENCY,
|
||||
{false}, Index::DROP_TOKENS_THRESHOLD,
|
||||
spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "",
|
||||
30, 5,
|
||||
"", 10, {}, {}, {}, 0,
|
||||
"<mark>", "</mark>", {}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7, true,
|
||||
4, {off}, 32767, 32767, 2, 1).get();
|
||||
|
||||
ASSERT_EQ(1, results["hits"].size());
|
||||
|
||||
// try enabling and overriding
|
||||
override_json_include["filter_curated_hits"] = true;
|
||||
override_t::parse(override_json_include, "", override_include);
|
||||
coll_mul_fields->add_override(override_include);
|
||||
|
||||
results = coll_mul_fields->search("not-found", {"title"}, "points:>70", {"starring"}, {}, {0}, 10, 1, FREQUENCY,
|
||||
{false}, Index::DROP_TOKENS_THRESHOLD,
|
||||
spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "",
|
||||
30, 5,
|
||||
"", 10, {}, {}, {}, 0,
|
||||
"<mark>", "</mark>", {}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7, true,
|
||||
4, {off}, 32767, 32767, 2, 0).get();
|
||||
|
||||
ASSERT_EQ(2, results["hits"].size());
|
||||
|
||||
}
|
||||
|
||||
TEST_F(CollectionOverrideTest, ExcludeIncludeFacetFilterQuery) {
|
||||
// Check facet field highlight for overridden results
|
||||
nlohmann::json override_json_include = {
|
||||
@ -449,7 +552,7 @@ TEST_F(CollectionOverrideTest, IncludeExcludeHitsQuery) {
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 5,
|
||||
"", 10, pinned_hits, {}, {}, 0,
|
||||
"<mark>", "</mark>", {}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7, true,
|
||||
4, {off}, 32767, 32767, 2, true).get();
|
||||
4, {off}, 32767, 32767, 2, 1).get();
|
||||
|
||||
ASSERT_EQ(4, results["found"].get<size_t>());
|
||||
ASSERT_STREQ("14", results["hits"][0]["document"]["id"].get<std::string>().c_str());
|
||||
|
Loading…
x
Reference in New Issue
Block a user