mirror of
https://github.com/typesense/typesense.git
synced 2025-05-18 04:32:38 +08:00
Support remove_matched_tokens=false when replace query is used.
This commit is contained in:
parent
dfa7191f32
commit
83ed38e3e6
272
include/index.h
272
include/index.h
@ -26,6 +26,7 @@
|
||||
#include <tsl/htrie_map.h>
|
||||
#include "id_list.h"
|
||||
#include "synonym_index.h"
|
||||
#include "override.h"
|
||||
|
||||
static constexpr size_t ARRAY_FACET_DIM = 4;
|
||||
using facet_map_t = spp::sparse_hash_map<uint32_t, facet_hash_values_t>;
|
||||
@ -78,277 +79,6 @@ struct query_tokens_t {
|
||||
std::vector<std::vector<std::string>> q_synonyms;
|
||||
};
|
||||
|
||||
struct override_t {
|
||||
static const std::string MATCH_EXACT;
|
||||
static const std::string MATCH_CONTAINS;
|
||||
|
||||
struct rule_t {
|
||||
std::string query;
|
||||
std::string match;
|
||||
bool dynamic_query = false;
|
||||
};
|
||||
|
||||
struct add_hit_t {
|
||||
std::string doc_id;
|
||||
uint32_t position = 0;
|
||||
};
|
||||
|
||||
struct drop_hit_t {
|
||||
std::string doc_id;
|
||||
};
|
||||
|
||||
std::string id;
|
||||
|
||||
rule_t rule;
|
||||
std::vector<add_hit_t> add_hits;
|
||||
std::vector<drop_hit_t> drop_hits;
|
||||
|
||||
std::string filter_by;
|
||||
bool remove_matched_tokens = false;
|
||||
bool filter_curated_hits = false;
|
||||
|
||||
bool stop_processing = true;
|
||||
|
||||
std::string sort_by;
|
||||
std::string replace_query;
|
||||
|
||||
// epoch seconds
|
||||
int64_t effective_from_ts = -1;
|
||||
int64_t effective_to_ts = -1;
|
||||
|
||||
override_t() = default;
|
||||
|
||||
static Option<bool> parse(const nlohmann::json& override_json, const std::string& id, override_t& override) {
|
||||
if(!override_json.is_object()) {
|
||||
return Option<bool>(400, "Bad JSON.");
|
||||
}
|
||||
|
||||
if(override_json.count("rule") == 0 || !override_json["rule"].is_object()) {
|
||||
return Option<bool>(400, "Missing `rule` definition.");
|
||||
}
|
||||
|
||||
if(override_json["rule"].count("query") == 0 || override_json["rule"].count("match") == 0) {
|
||||
return Option<bool>(400, "The `rule` definition must contain a `query` and `match`.");
|
||||
}
|
||||
|
||||
if(override_json.count("includes") == 0 && override_json.count("excludes") == 0 &&
|
||||
override_json.count("filter_by") == 0 && override_json.count("sort_by") == 0 &&
|
||||
override_json.count("remove_matched_tokens") == 0 &&
|
||||
override_json.count("replace_query") == 0) {
|
||||
return Option<bool>(400, "Must contain one of: `includes`, `excludes`, "
|
||||
"`filter_by`, `sort_by`, `remove_matched_tokens`, `replace_query`.");
|
||||
}
|
||||
|
||||
if(override_json.count("includes") != 0) {
|
||||
if(!override_json["includes"].is_array()) {
|
||||
return Option<bool>(400, "The `includes` value must be an array.");
|
||||
}
|
||||
|
||||
for(const auto & include_obj: override_json["includes"]) {
|
||||
if(!include_obj.is_object()) {
|
||||
return Option<bool>(400, "The `includes` value must be an array of objects.");
|
||||
}
|
||||
|
||||
if(include_obj.count("id") == 0 || include_obj.count("position") == 0) {
|
||||
return Option<bool>(400, "Inclusion definition must define both `id` and `position` keys.");
|
||||
}
|
||||
|
||||
if(!include_obj["id"].is_string()) {
|
||||
return Option<bool>(400, "Inclusion `id` must be a string.");
|
||||
}
|
||||
|
||||
if(!include_obj["position"].is_number_integer()) {
|
||||
return Option<bool>(400, "Inclusion `position` must be an integer.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(override_json.count("excludes") != 0) {
|
||||
if(!override_json["excludes"].is_array()) {
|
||||
return Option<bool>(400, "The `excludes` value must be an array.");
|
||||
}
|
||||
|
||||
for(const auto & exclude_obj: override_json["excludes"]) {
|
||||
if(!exclude_obj.is_object()) {
|
||||
return Option<bool>(400, "The `excludes` value must be an array of objects.");
|
||||
}
|
||||
|
||||
if(exclude_obj.count("id") == 0) {
|
||||
return Option<bool>(400, "Exclusion definition must define an `id`.");
|
||||
}
|
||||
|
||||
if(!exclude_obj["id"].is_string()) {
|
||||
return Option<bool>(400, "Exclusion `id` must be a string.");
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if(override_json.count("filter_by") != 0) {
|
||||
if(!override_json["filter_by"].is_string()) {
|
||||
return Option<bool>(400, "The `filter_by` must be a string.");
|
||||
}
|
||||
|
||||
if(override_json["filter_by"].get<std::string>().empty()) {
|
||||
return Option<bool>(400, "The `filter_by` must be a non-empty string.");
|
||||
}
|
||||
}
|
||||
|
||||
if(override_json.count("remove_matched_tokens") != 0) {
|
||||
if (!override_json["remove_matched_tokens"].is_boolean()) {
|
||||
return Option<bool>(400, "The `remove_matched_tokens` must be a boolean.");
|
||||
}
|
||||
}
|
||||
|
||||
if(override_json.count("filter_curated_hits") != 0) {
|
||||
if (!override_json["filter_curated_hits"].is_boolean()) {
|
||||
return Option<bool>(400, "The `filter_curated_hits` must be a boolean.");
|
||||
}
|
||||
}
|
||||
|
||||
if(override_json.count("stop_processing") != 0) {
|
||||
if (!override_json["stop_processing"].is_boolean()) {
|
||||
return Option<bool>(400, "The `stop_processing` must be a boolean.");
|
||||
}
|
||||
}
|
||||
|
||||
if(!id.empty()) {
|
||||
override.id = id;
|
||||
} else if(override_json.count("id") != 0) {
|
||||
override.id = override_json["id"].get<std::string>();
|
||||
} else {
|
||||
return Option<bool>(400, "Override `id` not provided.");
|
||||
}
|
||||
|
||||
override.rule.query = override_json["rule"]["query"].get<std::string>();
|
||||
override.rule.match = override_json["rule"]["match"].get<std::string>();
|
||||
|
||||
if (override_json.count("includes") != 0) {
|
||||
for(const auto & include: override_json["includes"]) {
|
||||
add_hit_t add_hit;
|
||||
add_hit.doc_id = include["id"].get<std::string>();
|
||||
add_hit.position = include["position"].get<uint32_t>();
|
||||
override.add_hits.push_back(add_hit);
|
||||
}
|
||||
}
|
||||
|
||||
if (override_json.count("excludes") != 0) {
|
||||
for(const auto & exclude: override_json["excludes"]) {
|
||||
drop_hit_t drop_hit;
|
||||
drop_hit.doc_id = exclude["id"].get<std::string>();
|
||||
override.drop_hits.push_back(drop_hit);
|
||||
}
|
||||
}
|
||||
|
||||
if (override_json.count("filter_by") != 0) {
|
||||
override.filter_by = override_json["filter_by"].get<std::string>();
|
||||
}
|
||||
|
||||
if (override_json.count("sort_by") != 0) {
|
||||
override.sort_by = override_json["sort_by"].get<std::string>();
|
||||
}
|
||||
|
||||
if (override_json.count("replace_query") != 0) {
|
||||
if(override_json.count("remove_matched_tokens") != 0) {
|
||||
return Option<bool>(400, "Only one of `replace_query` or `remove_matched_tokens` can be specified.");
|
||||
}
|
||||
override.replace_query = override_json["replace_query"].get<std::string>();
|
||||
}
|
||||
|
||||
if(override_json.count("remove_matched_tokens") != 0) {
|
||||
override.remove_matched_tokens = override_json["remove_matched_tokens"].get<bool>();
|
||||
} else {
|
||||
override.remove_matched_tokens = (override_json.count("filter_by") != 0);
|
||||
}
|
||||
|
||||
if(override_json.count("filter_curated_hits") != 0) {
|
||||
override.filter_curated_hits = override_json["filter_curated_hits"].get<bool>();
|
||||
}
|
||||
|
||||
if(override_json.count("stop_processing") != 0) {
|
||||
override.stop_processing = override_json["stop_processing"].get<bool>();
|
||||
}
|
||||
|
||||
if(override_json.count("effective_from_ts") != 0) {
|
||||
override.effective_from_ts = override_json["effective_from_ts"].get<int64_t>();
|
||||
}
|
||||
|
||||
if(override_json.count("effective_to_ts") != 0) {
|
||||
override.effective_to_ts = override_json["effective_to_ts"].get<int64_t>();
|
||||
}
|
||||
|
||||
// we have to also detect if it is a dynamic query rule
|
||||
size_t i = 0;
|
||||
while(i < override.rule.query.size()) {
|
||||
if(override.rule.query[i] == '{') {
|
||||
// look for closing curly
|
||||
i++;
|
||||
while(i < override.rule.query.size()) {
|
||||
if(override.rule.query[i] == '}') {
|
||||
override.rule.dynamic_query = true;
|
||||
// remove spaces around curlies
|
||||
override.rule.query = StringUtils::trim_curly_spaces(override.rule.query);
|
||||
break;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
}
|
||||
i++;
|
||||
}
|
||||
|
||||
return Option<bool>(true);
|
||||
}
|
||||
|
||||
nlohmann::json to_json() const {
|
||||
nlohmann::json override;
|
||||
override["id"] = id;
|
||||
override["rule"]["query"] = rule.query;
|
||||
override["rule"]["match"] = rule.match;
|
||||
|
||||
override["includes"] = nlohmann::json::array();
|
||||
|
||||
for(const auto & add_hit: add_hits) {
|
||||
nlohmann::json include;
|
||||
include["id"] = add_hit.doc_id;
|
||||
include["position"] = add_hit.position;
|
||||
override["includes"].push_back(include);
|
||||
}
|
||||
|
||||
override["excludes"] = nlohmann::json::array();
|
||||
for(const auto & drop_hit: drop_hits) {
|
||||
nlohmann::json exclude;
|
||||
exclude["id"] = drop_hit.doc_id;
|
||||
override["excludes"].push_back(exclude);
|
||||
}
|
||||
|
||||
if(!filter_by.empty()) {
|
||||
override["filter_by"] = filter_by;
|
||||
}
|
||||
|
||||
if(!sort_by.empty()) {
|
||||
override["sort_by"] = sort_by;
|
||||
}
|
||||
|
||||
if(!replace_query.empty()) {
|
||||
override["replace_query"] = replace_query;
|
||||
}
|
||||
|
||||
if(effective_from_ts != -1) {
|
||||
override["effective_from_ts"] = effective_from_ts;
|
||||
}
|
||||
|
||||
if(effective_to_ts != -1) {
|
||||
override["effective_to_ts"] = effective_to_ts;
|
||||
}
|
||||
|
||||
override["remove_matched_tokens"] = remove_matched_tokens;
|
||||
override["filter_curated_hits"] = filter_curated_hits;
|
||||
override["stop_processing"] = stop_processing;
|
||||
|
||||
return override;
|
||||
}
|
||||
};
|
||||
|
||||
enum enable_t {
|
||||
always,
|
||||
fallback,
|
||||
|
49
include/override.h
Normal file
49
include/override.h
Normal file
@ -0,0 +1,49 @@
|
||||
#pragma once
|
||||
#include <string>
|
||||
#include <json.hpp>
|
||||
#include "option.h"
|
||||
|
||||
struct override_t {
|
||||
static const std::string MATCH_EXACT;
|
||||
static const std::string MATCH_CONTAINS;
|
||||
|
||||
struct rule_t {
|
||||
std::string query;
|
||||
std::string match;
|
||||
bool dynamic_query = false;
|
||||
};
|
||||
|
||||
struct add_hit_t {
|
||||
std::string doc_id;
|
||||
uint32_t position = 0;
|
||||
};
|
||||
|
||||
struct drop_hit_t {
|
||||
std::string doc_id;
|
||||
};
|
||||
|
||||
std::string id;
|
||||
|
||||
rule_t rule;
|
||||
std::vector<add_hit_t> add_hits;
|
||||
std::vector<drop_hit_t> drop_hits;
|
||||
|
||||
std::string filter_by;
|
||||
bool remove_matched_tokens = false;
|
||||
bool filter_curated_hits = false;
|
||||
|
||||
bool stop_processing = true;
|
||||
|
||||
std::string sort_by;
|
||||
std::string replace_query;
|
||||
|
||||
// epoch seconds
|
||||
int64_t effective_from_ts = -1;
|
||||
int64_t effective_to_ts = -1;
|
||||
|
||||
override_t() = default;
|
||||
|
||||
static Option<bool> parse(const nlohmann::json& override_json, const std::string& id, override_t& override);
|
||||
|
||||
nlohmann::json to_json() const;
|
||||
};
|
232
src/override.cpp
Normal file
232
src/override.cpp
Normal file
@ -0,0 +1,232 @@
|
||||
#include <string_utils.h>
|
||||
#include "override.h"
|
||||
|
||||
Option<bool> override_t::parse(const nlohmann::json& override_json, const std::string& id, override_t& override) {
|
||||
if(!override_json.is_object()) {
|
||||
return Option<bool>(400, "Bad JSON.");
|
||||
}
|
||||
|
||||
if(override_json.count("rule") == 0 || !override_json["rule"].is_object()) {
|
||||
return Option<bool>(400, "Missing `rule` definition.");
|
||||
}
|
||||
|
||||
if(override_json["rule"].count("query") == 0 || override_json["rule"].count("match") == 0) {
|
||||
return Option<bool>(400, "The `rule` definition must contain a `query` and `match`.");
|
||||
}
|
||||
|
||||
if(override_json.count("includes") == 0 && override_json.count("excludes") == 0 &&
|
||||
override_json.count("filter_by") == 0 && override_json.count("sort_by") == 0 &&
|
||||
override_json.count("remove_matched_tokens") == 0 &&
|
||||
override_json.count("replace_query") == 0) {
|
||||
return Option<bool>(400, "Must contain one of: `includes`, `excludes`, "
|
||||
"`filter_by`, `sort_by`, `remove_matched_tokens`, `replace_query`.");
|
||||
}
|
||||
|
||||
if(override_json.count("includes") != 0) {
|
||||
if(!override_json["includes"].is_array()) {
|
||||
return Option<bool>(400, "The `includes` value must be an array.");
|
||||
}
|
||||
|
||||
for(const auto & include_obj: override_json["includes"]) {
|
||||
if(!include_obj.is_object()) {
|
||||
return Option<bool>(400, "The `includes` value must be an array of objects.");
|
||||
}
|
||||
|
||||
if(include_obj.count("id") == 0 || include_obj.count("position") == 0) {
|
||||
return Option<bool>(400, "Inclusion definition must define both `id` and `position` keys.");
|
||||
}
|
||||
|
||||
if(!include_obj["id"].is_string()) {
|
||||
return Option<bool>(400, "Inclusion `id` must be a string.");
|
||||
}
|
||||
|
||||
if(!include_obj["position"].is_number_integer()) {
|
||||
return Option<bool>(400, "Inclusion `position` must be an integer.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(override_json.count("excludes") != 0) {
|
||||
if(!override_json["excludes"].is_array()) {
|
||||
return Option<bool>(400, "The `excludes` value must be an array.");
|
||||
}
|
||||
|
||||
for(const auto & exclude_obj: override_json["excludes"]) {
|
||||
if(!exclude_obj.is_object()) {
|
||||
return Option<bool>(400, "The `excludes` value must be an array of objects.");
|
||||
}
|
||||
|
||||
if(exclude_obj.count("id") == 0) {
|
||||
return Option<bool>(400, "Exclusion definition must define an `id`.");
|
||||
}
|
||||
|
||||
if(!exclude_obj["id"].is_string()) {
|
||||
return Option<bool>(400, "Exclusion `id` must be a string.");
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if(override_json.count("filter_by") != 0) {
|
||||
if(!override_json["filter_by"].is_string()) {
|
||||
return Option<bool>(400, "The `filter_by` must be a string.");
|
||||
}
|
||||
|
||||
if(override_json["filter_by"].get<std::string>().empty()) {
|
||||
return Option<bool>(400, "The `filter_by` must be a non-empty string.");
|
||||
}
|
||||
}
|
||||
|
||||
if(override_json.count("remove_matched_tokens") != 0) {
|
||||
if (!override_json["remove_matched_tokens"].is_boolean()) {
|
||||
return Option<bool>(400, "The `remove_matched_tokens` must be a boolean.");
|
||||
}
|
||||
}
|
||||
|
||||
if(override_json.count("filter_curated_hits") != 0) {
|
||||
if (!override_json["filter_curated_hits"].is_boolean()) {
|
||||
return Option<bool>(400, "The `filter_curated_hits` must be a boolean.");
|
||||
}
|
||||
}
|
||||
|
||||
if(override_json.count("stop_processing") != 0) {
|
||||
if (!override_json["stop_processing"].is_boolean()) {
|
||||
return Option<bool>(400, "The `stop_processing` must be a boolean.");
|
||||
}
|
||||
}
|
||||
|
||||
if(!id.empty()) {
|
||||
override.id = id;
|
||||
} else if(override_json.count("id") != 0) {
|
||||
override.id = override_json["id"].get<std::string>();
|
||||
} else {
|
||||
return Option<bool>(400, "Override `id` not provided.");
|
||||
}
|
||||
|
||||
override.rule.query = override_json["rule"]["query"].get<std::string>();
|
||||
override.rule.match = override_json["rule"]["match"].get<std::string>();
|
||||
|
||||
if (override_json.count("includes") != 0) {
|
||||
for(const auto & include: override_json["includes"]) {
|
||||
add_hit_t add_hit;
|
||||
add_hit.doc_id = include["id"].get<std::string>();
|
||||
add_hit.position = include["position"].get<uint32_t>();
|
||||
override.add_hits.push_back(add_hit);
|
||||
}
|
||||
}
|
||||
|
||||
if (override_json.count("excludes") != 0) {
|
||||
for(const auto & exclude: override_json["excludes"]) {
|
||||
drop_hit_t drop_hit;
|
||||
drop_hit.doc_id = exclude["id"].get<std::string>();
|
||||
override.drop_hits.push_back(drop_hit);
|
||||
}
|
||||
}
|
||||
|
||||
if (override_json.count("filter_by") != 0) {
|
||||
override.filter_by = override_json["filter_by"].get<std::string>();
|
||||
}
|
||||
|
||||
if (override_json.count("sort_by") != 0) {
|
||||
override.sort_by = override_json["sort_by"].get<std::string>();
|
||||
}
|
||||
|
||||
if (override_json.count("replace_query") != 0) {
|
||||
if(override_json.count("remove_matched_tokens") != 0 && override_json["remove_matched_tokens"].get<bool>()) {
|
||||
return Option<bool>(400, "Only one of `replace_query` or `remove_matched_tokens` can be specified.");
|
||||
}
|
||||
override.replace_query = override_json["replace_query"].get<std::string>();
|
||||
}
|
||||
|
||||
if(override_json.count("remove_matched_tokens") != 0) {
|
||||
override.remove_matched_tokens = override_json["remove_matched_tokens"].get<bool>();
|
||||
} else {
|
||||
override.remove_matched_tokens = (override_json.count("filter_by") != 0);
|
||||
}
|
||||
|
||||
if(override_json.count("filter_curated_hits") != 0) {
|
||||
override.filter_curated_hits = override_json["filter_curated_hits"].get<bool>();
|
||||
}
|
||||
|
||||
if(override_json.count("stop_processing") != 0) {
|
||||
override.stop_processing = override_json["stop_processing"].get<bool>();
|
||||
}
|
||||
|
||||
if(override_json.count("effective_from_ts") != 0) {
|
||||
override.effective_from_ts = override_json["effective_from_ts"].get<int64_t>();
|
||||
}
|
||||
|
||||
if(override_json.count("effective_to_ts") != 0) {
|
||||
override.effective_to_ts = override_json["effective_to_ts"].get<int64_t>();
|
||||
}
|
||||
|
||||
// we have to also detect if it is a dynamic query rule
|
||||
size_t i = 0;
|
||||
while(i < override.rule.query.size()) {
|
||||
if(override.rule.query[i] == '{') {
|
||||
// look for closing curly
|
||||
i++;
|
||||
while(i < override.rule.query.size()) {
|
||||
if(override.rule.query[i] == '}') {
|
||||
override.rule.dynamic_query = true;
|
||||
// remove spaces around curlies
|
||||
override.rule.query = StringUtils::trim_curly_spaces(override.rule.query);
|
||||
break;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
}
|
||||
i++;
|
||||
}
|
||||
|
||||
return Option<bool>(true);
|
||||
}
|
||||
|
||||
nlohmann::json override_t::to_json() const {
|
||||
nlohmann::json override;
|
||||
override["id"] = id;
|
||||
override["rule"]["query"] = rule.query;
|
||||
override["rule"]["match"] = rule.match;
|
||||
|
||||
override["includes"] = nlohmann::json::array();
|
||||
|
||||
for(const auto & add_hit: add_hits) {
|
||||
nlohmann::json include;
|
||||
include["id"] = add_hit.doc_id;
|
||||
include["position"] = add_hit.position;
|
||||
override["includes"].push_back(include);
|
||||
}
|
||||
|
||||
override["excludes"] = nlohmann::json::array();
|
||||
for(const auto & drop_hit: drop_hits) {
|
||||
nlohmann::json exclude;
|
||||
exclude["id"] = drop_hit.doc_id;
|
||||
override["excludes"].push_back(exclude);
|
||||
}
|
||||
|
||||
if(!filter_by.empty()) {
|
||||
override["filter_by"] = filter_by;
|
||||
}
|
||||
|
||||
if(!sort_by.empty()) {
|
||||
override["sort_by"] = sort_by;
|
||||
}
|
||||
|
||||
if(!replace_query.empty()) {
|
||||
override["replace_query"] = replace_query;
|
||||
}
|
||||
|
||||
if(effective_from_ts != -1) {
|
||||
override["effective_from_ts"] = effective_from_ts;
|
||||
}
|
||||
|
||||
if(effective_to_ts != -1) {
|
||||
override["effective_to_ts"] = effective_to_ts;
|
||||
}
|
||||
|
||||
override["remove_matched_tokens"] = remove_matched_tokens;
|
||||
override["filter_curated_hits"] = filter_curated_hits;
|
||||
override["stop_processing"] = stop_processing;
|
||||
|
||||
return override;
|
||||
}
|
@ -839,6 +839,11 @@ TEST_F(CollectionOverrideTest, ReplaceQuery) {
|
||||
op = override_t::parse(override_json, "rule-1", override_rule);
|
||||
ASSERT_FALSE(op.ok());
|
||||
ASSERT_EQ("Only one of `replace_query` or `remove_matched_tokens` can be specified.", op.error());
|
||||
|
||||
// it's okay when it's explicitly set to false
|
||||
override_json["remove_matched_tokens"] = false;
|
||||
op = override_t::parse(override_json, "rule-1", override_rule);
|
||||
ASSERT_TRUE(op.ok());
|
||||
}
|
||||
|
||||
TEST_F(CollectionOverrideTest, WindowForRule) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user