diff --git a/include/index.h b/include/index.h index b9a2ca6b..f75f83b2 100644 --- a/include/index.h +++ b/include/index.h @@ -26,6 +26,7 @@ #include #include "id_list.h" #include "synonym_index.h" +#include "override.h" static constexpr size_t ARRAY_FACET_DIM = 4; using facet_map_t = spp::sparse_hash_map; @@ -78,277 +79,6 @@ struct query_tokens_t { std::vector> q_synonyms; }; -struct override_t { - static const std::string MATCH_EXACT; - static const std::string MATCH_CONTAINS; - - struct rule_t { - std::string query; - std::string match; - bool dynamic_query = false; - }; - - struct add_hit_t { - std::string doc_id; - uint32_t position = 0; - }; - - struct drop_hit_t { - std::string doc_id; - }; - - std::string id; - - rule_t rule; - std::vector add_hits; - std::vector drop_hits; - - std::string filter_by; - bool remove_matched_tokens = false; - bool filter_curated_hits = false; - - bool stop_processing = true; - - std::string sort_by; - std::string replace_query; - - // epoch seconds - int64_t effective_from_ts = -1; - int64_t effective_to_ts = -1; - - override_t() = default; - - static Option parse(const nlohmann::json& override_json, const std::string& id, override_t& override) { - if(!override_json.is_object()) { - return Option(400, "Bad JSON."); - } - - if(override_json.count("rule") == 0 || !override_json["rule"].is_object()) { - return Option(400, "Missing `rule` definition."); - } - - if(override_json["rule"].count("query") == 0 || override_json["rule"].count("match") == 0) { - return Option(400, "The `rule` definition must contain a `query` and `match`."); - } - - if(override_json.count("includes") == 0 && override_json.count("excludes") == 0 && - override_json.count("filter_by") == 0 && override_json.count("sort_by") == 0 && - override_json.count("remove_matched_tokens") == 0 && - override_json.count("replace_query") == 0) { - return Option(400, "Must contain one of: `includes`, `excludes`, " - "`filter_by`, `sort_by`, `remove_matched_tokens`, `replace_query`."); - } - - if(override_json.count("includes") != 0) { - if(!override_json["includes"].is_array()) { - return Option(400, "The `includes` value must be an array."); - } - - for(const auto & include_obj: override_json["includes"]) { - if(!include_obj.is_object()) { - return Option(400, "The `includes` value must be an array of objects."); - } - - if(include_obj.count("id") == 0 || include_obj.count("position") == 0) { - return Option(400, "Inclusion definition must define both `id` and `position` keys."); - } - - if(!include_obj["id"].is_string()) { - return Option(400, "Inclusion `id` must be a string."); - } - - if(!include_obj["position"].is_number_integer()) { - return Option(400, "Inclusion `position` must be an integer."); - } - } - } - - if(override_json.count("excludes") != 0) { - if(!override_json["excludes"].is_array()) { - return Option(400, "The `excludes` value must be an array."); - } - - for(const auto & exclude_obj: override_json["excludes"]) { - if(!exclude_obj.is_object()) { - return Option(400, "The `excludes` value must be an array of objects."); - } - - if(exclude_obj.count("id") == 0) { - return Option(400, "Exclusion definition must define an `id`."); - } - - if(!exclude_obj["id"].is_string()) { - return Option(400, "Exclusion `id` must be a string."); - } - } - - } - - if(override_json.count("filter_by") != 0) { - if(!override_json["filter_by"].is_string()) { - return Option(400, "The `filter_by` must be a string."); - } - - if(override_json["filter_by"].get().empty()) { - return Option(400, "The `filter_by` must be a non-empty string."); - } - } - - if(override_json.count("remove_matched_tokens") != 0) { - if (!override_json["remove_matched_tokens"].is_boolean()) { - return Option(400, "The `remove_matched_tokens` must be a boolean."); - } - } - - if(override_json.count("filter_curated_hits") != 0) { - if (!override_json["filter_curated_hits"].is_boolean()) { - return Option(400, "The `filter_curated_hits` must be a boolean."); - } - } - - if(override_json.count("stop_processing") != 0) { - if (!override_json["stop_processing"].is_boolean()) { - return Option(400, "The `stop_processing` must be a boolean."); - } - } - - if(!id.empty()) { - override.id = id; - } else if(override_json.count("id") != 0) { - override.id = override_json["id"].get(); - } else { - return Option(400, "Override `id` not provided."); - } - - override.rule.query = override_json["rule"]["query"].get(); - override.rule.match = override_json["rule"]["match"].get(); - - if (override_json.count("includes") != 0) { - for(const auto & include: override_json["includes"]) { - add_hit_t add_hit; - add_hit.doc_id = include["id"].get(); - add_hit.position = include["position"].get(); - override.add_hits.push_back(add_hit); - } - } - - if (override_json.count("excludes") != 0) { - for(const auto & exclude: override_json["excludes"]) { - drop_hit_t drop_hit; - drop_hit.doc_id = exclude["id"].get(); - override.drop_hits.push_back(drop_hit); - } - } - - if (override_json.count("filter_by") != 0) { - override.filter_by = override_json["filter_by"].get(); - } - - if (override_json.count("sort_by") != 0) { - override.sort_by = override_json["sort_by"].get(); - } - - if (override_json.count("replace_query") != 0) { - if(override_json.count("remove_matched_tokens") != 0) { - return Option(400, "Only one of `replace_query` or `remove_matched_tokens` can be specified."); - } - override.replace_query = override_json["replace_query"].get(); - } - - if(override_json.count("remove_matched_tokens") != 0) { - override.remove_matched_tokens = override_json["remove_matched_tokens"].get(); - } else { - override.remove_matched_tokens = (override_json.count("filter_by") != 0); - } - - if(override_json.count("filter_curated_hits") != 0) { - override.filter_curated_hits = override_json["filter_curated_hits"].get(); - } - - if(override_json.count("stop_processing") != 0) { - override.stop_processing = override_json["stop_processing"].get(); - } - - if(override_json.count("effective_from_ts") != 0) { - override.effective_from_ts = override_json["effective_from_ts"].get(); - } - - if(override_json.count("effective_to_ts") != 0) { - override.effective_to_ts = override_json["effective_to_ts"].get(); - } - - // we have to also detect if it is a dynamic query rule - size_t i = 0; - while(i < override.rule.query.size()) { - if(override.rule.query[i] == '{') { - // look for closing curly - i++; - while(i < override.rule.query.size()) { - if(override.rule.query[i] == '}') { - override.rule.dynamic_query = true; - // remove spaces around curlies - override.rule.query = StringUtils::trim_curly_spaces(override.rule.query); - break; - } - i++; - } - } - i++; - } - - return Option(true); - } - - nlohmann::json to_json() const { - nlohmann::json override; - override["id"] = id; - override["rule"]["query"] = rule.query; - override["rule"]["match"] = rule.match; - - override["includes"] = nlohmann::json::array(); - - for(const auto & add_hit: add_hits) { - nlohmann::json include; - include["id"] = add_hit.doc_id; - include["position"] = add_hit.position; - override["includes"].push_back(include); - } - - override["excludes"] = nlohmann::json::array(); - for(const auto & drop_hit: drop_hits) { - nlohmann::json exclude; - exclude["id"] = drop_hit.doc_id; - override["excludes"].push_back(exclude); - } - - if(!filter_by.empty()) { - override["filter_by"] = filter_by; - } - - if(!sort_by.empty()) { - override["sort_by"] = sort_by; - } - - if(!replace_query.empty()) { - override["replace_query"] = replace_query; - } - - if(effective_from_ts != -1) { - override["effective_from_ts"] = effective_from_ts; - } - - if(effective_to_ts != -1) { - override["effective_to_ts"] = effective_to_ts; - } - - override["remove_matched_tokens"] = remove_matched_tokens; - override["filter_curated_hits"] = filter_curated_hits; - override["stop_processing"] = stop_processing; - - return override; - } -}; - enum enable_t { always, fallback, diff --git a/include/override.h b/include/override.h new file mode 100644 index 00000000..1003d865 --- /dev/null +++ b/include/override.h @@ -0,0 +1,49 @@ +#pragma once +#include +#include +#include "option.h" + +struct override_t { + static const std::string MATCH_EXACT; + static const std::string MATCH_CONTAINS; + + struct rule_t { + std::string query; + std::string match; + bool dynamic_query = false; + }; + + struct add_hit_t { + std::string doc_id; + uint32_t position = 0; + }; + + struct drop_hit_t { + std::string doc_id; + }; + + std::string id; + + rule_t rule; + std::vector add_hits; + std::vector drop_hits; + + std::string filter_by; + bool remove_matched_tokens = false; + bool filter_curated_hits = false; + + bool stop_processing = true; + + std::string sort_by; + std::string replace_query; + + // epoch seconds + int64_t effective_from_ts = -1; + int64_t effective_to_ts = -1; + + override_t() = default; + + static Option parse(const nlohmann::json& override_json, const std::string& id, override_t& override); + + nlohmann::json to_json() const; +}; diff --git a/src/override.cpp b/src/override.cpp new file mode 100644 index 00000000..f85bb8e3 --- /dev/null +++ b/src/override.cpp @@ -0,0 +1,232 @@ +#include +#include "override.h" + +Option override_t::parse(const nlohmann::json& override_json, const std::string& id, override_t& override) { + if(!override_json.is_object()) { + return Option(400, "Bad JSON."); + } + + if(override_json.count("rule") == 0 || !override_json["rule"].is_object()) { + return Option(400, "Missing `rule` definition."); + } + + if(override_json["rule"].count("query") == 0 || override_json["rule"].count("match") == 0) { + return Option(400, "The `rule` definition must contain a `query` and `match`."); + } + + if(override_json.count("includes") == 0 && override_json.count("excludes") == 0 && + override_json.count("filter_by") == 0 && override_json.count("sort_by") == 0 && + override_json.count("remove_matched_tokens") == 0 && + override_json.count("replace_query") == 0) { + return Option(400, "Must contain one of: `includes`, `excludes`, " + "`filter_by`, `sort_by`, `remove_matched_tokens`, `replace_query`."); + } + + if(override_json.count("includes") != 0) { + if(!override_json["includes"].is_array()) { + return Option(400, "The `includes` value must be an array."); + } + + for(const auto & include_obj: override_json["includes"]) { + if(!include_obj.is_object()) { + return Option(400, "The `includes` value must be an array of objects."); + } + + if(include_obj.count("id") == 0 || include_obj.count("position") == 0) { + return Option(400, "Inclusion definition must define both `id` and `position` keys."); + } + + if(!include_obj["id"].is_string()) { + return Option(400, "Inclusion `id` must be a string."); + } + + if(!include_obj["position"].is_number_integer()) { + return Option(400, "Inclusion `position` must be an integer."); + } + } + } + + if(override_json.count("excludes") != 0) { + if(!override_json["excludes"].is_array()) { + return Option(400, "The `excludes` value must be an array."); + } + + for(const auto & exclude_obj: override_json["excludes"]) { + if(!exclude_obj.is_object()) { + return Option(400, "The `excludes` value must be an array of objects."); + } + + if(exclude_obj.count("id") == 0) { + return Option(400, "Exclusion definition must define an `id`."); + } + + if(!exclude_obj["id"].is_string()) { + return Option(400, "Exclusion `id` must be a string."); + } + } + + } + + if(override_json.count("filter_by") != 0) { + if(!override_json["filter_by"].is_string()) { + return Option(400, "The `filter_by` must be a string."); + } + + if(override_json["filter_by"].get().empty()) { + return Option(400, "The `filter_by` must be a non-empty string."); + } + } + + if(override_json.count("remove_matched_tokens") != 0) { + if (!override_json["remove_matched_tokens"].is_boolean()) { + return Option(400, "The `remove_matched_tokens` must be a boolean."); + } + } + + if(override_json.count("filter_curated_hits") != 0) { + if (!override_json["filter_curated_hits"].is_boolean()) { + return Option(400, "The `filter_curated_hits` must be a boolean."); + } + } + + if(override_json.count("stop_processing") != 0) { + if (!override_json["stop_processing"].is_boolean()) { + return Option(400, "The `stop_processing` must be a boolean."); + } + } + + if(!id.empty()) { + override.id = id; + } else if(override_json.count("id") != 0) { + override.id = override_json["id"].get(); + } else { + return Option(400, "Override `id` not provided."); + } + + override.rule.query = override_json["rule"]["query"].get(); + override.rule.match = override_json["rule"]["match"].get(); + + if (override_json.count("includes") != 0) { + for(const auto & include: override_json["includes"]) { + add_hit_t add_hit; + add_hit.doc_id = include["id"].get(); + add_hit.position = include["position"].get(); + override.add_hits.push_back(add_hit); + } + } + + if (override_json.count("excludes") != 0) { + for(const auto & exclude: override_json["excludes"]) { + drop_hit_t drop_hit; + drop_hit.doc_id = exclude["id"].get(); + override.drop_hits.push_back(drop_hit); + } + } + + if (override_json.count("filter_by") != 0) { + override.filter_by = override_json["filter_by"].get(); + } + + if (override_json.count("sort_by") != 0) { + override.sort_by = override_json["sort_by"].get(); + } + + if (override_json.count("replace_query") != 0) { + if(override_json.count("remove_matched_tokens") != 0 && override_json["remove_matched_tokens"].get()) { + return Option(400, "Only one of `replace_query` or `remove_matched_tokens` can be specified."); + } + override.replace_query = override_json["replace_query"].get(); + } + + if(override_json.count("remove_matched_tokens") != 0) { + override.remove_matched_tokens = override_json["remove_matched_tokens"].get(); + } else { + override.remove_matched_tokens = (override_json.count("filter_by") != 0); + } + + if(override_json.count("filter_curated_hits") != 0) { + override.filter_curated_hits = override_json["filter_curated_hits"].get(); + } + + if(override_json.count("stop_processing") != 0) { + override.stop_processing = override_json["stop_processing"].get(); + } + + if(override_json.count("effective_from_ts") != 0) { + override.effective_from_ts = override_json["effective_from_ts"].get(); + } + + if(override_json.count("effective_to_ts") != 0) { + override.effective_to_ts = override_json["effective_to_ts"].get(); + } + + // we have to also detect if it is a dynamic query rule + size_t i = 0; + while(i < override.rule.query.size()) { + if(override.rule.query[i] == '{') { + // look for closing curly + i++; + while(i < override.rule.query.size()) { + if(override.rule.query[i] == '}') { + override.rule.dynamic_query = true; + // remove spaces around curlies + override.rule.query = StringUtils::trim_curly_spaces(override.rule.query); + break; + } + i++; + } + } + i++; + } + + return Option(true); +} + +nlohmann::json override_t::to_json() const { + nlohmann::json override; + override["id"] = id; + override["rule"]["query"] = rule.query; + override["rule"]["match"] = rule.match; + + override["includes"] = nlohmann::json::array(); + + for(const auto & add_hit: add_hits) { + nlohmann::json include; + include["id"] = add_hit.doc_id; + include["position"] = add_hit.position; + override["includes"].push_back(include); + } + + override["excludes"] = nlohmann::json::array(); + for(const auto & drop_hit: drop_hits) { + nlohmann::json exclude; + exclude["id"] = drop_hit.doc_id; + override["excludes"].push_back(exclude); + } + + if(!filter_by.empty()) { + override["filter_by"] = filter_by; + } + + if(!sort_by.empty()) { + override["sort_by"] = sort_by; + } + + if(!replace_query.empty()) { + override["replace_query"] = replace_query; + } + + if(effective_from_ts != -1) { + override["effective_from_ts"] = effective_from_ts; + } + + if(effective_to_ts != -1) { + override["effective_to_ts"] = effective_to_ts; + } + + override["remove_matched_tokens"] = remove_matched_tokens; + override["filter_curated_hits"] = filter_curated_hits; + override["stop_processing"] = stop_processing; + + return override; +} diff --git a/test/collection_override_test.cpp b/test/collection_override_test.cpp index 7e1cef25..ea40ccb5 100644 --- a/test/collection_override_test.cpp +++ b/test/collection_override_test.cpp @@ -839,6 +839,11 @@ TEST_F(CollectionOverrideTest, ReplaceQuery) { op = override_t::parse(override_json, "rule-1", override_rule); ASSERT_FALSE(op.ok()); ASSERT_EQ("Only one of `replace_query` or `remove_matched_tokens` can be specified.", op.error()); + + // it's okay when it's explicitly set to false + override_json["remove_matched_tokens"] = false; + op = override_t::parse(override_json, "rule-1", override_rule); + ASSERT_TRUE(op.ok()); } TEST_F(CollectionOverrideTest, WindowForRule) {