mirror of
https://github.com/typesense/typesense.git
synced 2025-05-23 07:09:44 +08:00
refactor
This commit is contained in:
parent
2c0c108d0e
commit
009194e51a
@ -391,7 +391,7 @@ public:
|
||||
void parse_search_query(const std::string &query, std::vector<std::string>& q_include_tokens,
|
||||
std::vector<std::vector<std::string>>& q_exclude_tokens,
|
||||
std::vector<std::vector<std::string>>& q_phrases,
|
||||
const std::string& locale, const bool already_segmented) const;
|
||||
const std::string& locale, const bool already_segmented, const std::string& stopword) const;
|
||||
|
||||
// PUBLIC OPERATIONS
|
||||
|
||||
|
@ -1483,7 +1483,7 @@ Option<nlohmann::json> Collection::search(std::string raw_query,
|
||||
field_query_tokens.emplace_back(query_tokens_t{});
|
||||
parse_search_query(query, q_include_tokens,
|
||||
field_query_tokens[0].q_exclude_tokens, field_query_tokens[0].q_phrases, "",
|
||||
false);
|
||||
false, stopword);
|
||||
for(size_t i = 0; i < q_include_tokens.size(); i++) {
|
||||
auto& q_include_token = q_include_tokens[i];
|
||||
field_query_tokens[0].q_include_tokens.emplace_back(i, q_include_token, (i == q_include_tokens.size() - 1),
|
||||
@ -1495,7 +1495,7 @@ Option<nlohmann::json> Collection::search(std::string raw_query,
|
||||
parse_search_query(query, q_include_tokens,
|
||||
field_query_tokens[0].q_exclude_tokens,
|
||||
field_query_tokens[0].q_phrases,
|
||||
field_locale, pre_segmented_query);
|
||||
field_locale, pre_segmented_query, stopword);
|
||||
|
||||
// process filter overrides first, before synonyms (order is important)
|
||||
|
||||
@ -1516,20 +1516,6 @@ Option<nlohmann::json> Collection::search(std::string raw_query,
|
||||
}
|
||||
}
|
||||
|
||||
nlohmann::json stopwords_list;
|
||||
const auto &stopword_op = CollectionManager::get_instance().get_stopword(stopword, stopwords_list);
|
||||
if (stopword_op.ok()) {
|
||||
auto &include_tokens = field_query_tokens[0].q_include_tokens;
|
||||
for (const auto &search_item: stopwords_list.items()) {
|
||||
auto val = search_item.value().get<std::string>();
|
||||
std::transform(val.begin(), val.end(), val.begin(), ::tolower);
|
||||
include_tokens.erase(std::remove_if(include_tokens.begin(), include_tokens.end(),
|
||||
[&](const auto& token) {
|
||||
return token.value == val;
|
||||
}), include_tokens.end());
|
||||
}
|
||||
}
|
||||
|
||||
for(size_t i = 1; i < search_fields.size(); i++) {
|
||||
field_query_tokens.emplace_back(query_tokens_t{});
|
||||
field_query_tokens[i] = field_query_tokens[0];
|
||||
@ -2435,12 +2421,19 @@ void Collection::process_filter_overrides(std::vector<const override_t*>& filter
|
||||
void Collection::parse_search_query(const std::string &query, std::vector<std::string>& q_include_tokens,
|
||||
std::vector<std::vector<std::string>>& q_exclude_tokens,
|
||||
std::vector<std::vector<std::string>>& q_phrases,
|
||||
const std::string& locale, const bool already_segmented) const {
|
||||
const std::string& locale, const bool already_segmented, const std::string& stopword) const {
|
||||
if(query == "*") {
|
||||
q_exclude_tokens = {};
|
||||
q_include_tokens = {query};
|
||||
} else {
|
||||
std::vector<std::string> tokens;
|
||||
nlohmann::json stopwords_list;
|
||||
if(!stopword.empty()) {
|
||||
const auto &stopword_op = CollectionManager::get_instance().get_stopword(stopword, stopwords_list);
|
||||
if (!stopword_op.ok()) {
|
||||
LOG(ERROR) << "Error fetching stopword_list for stopword " << stopword << " "<<stopword_op.error();
|
||||
}
|
||||
}
|
||||
|
||||
if(already_segmented) {
|
||||
StringUtils::split(query, tokens, " ");
|
||||
@ -2452,6 +2445,12 @@ void Collection::parse_search_query(const std::string &query, std::vector<std::s
|
||||
Tokenizer(query, true, false, locale, custom_symbols, token_separators).tokenize(tokens);
|
||||
}
|
||||
|
||||
for (const auto &search_item: stopwords_list.items()) {
|
||||
auto val = search_item.value().get<std::string>();
|
||||
std::transform(val.begin(), val.end(), val.begin(), ::tolower);
|
||||
tokens.erase(std::remove(tokens.begin(), tokens.end(), val), tokens.end());
|
||||
}
|
||||
|
||||
bool exclude_operator_prior = false;
|
||||
bool phrase_search_op_prior = false;
|
||||
std::vector<std::string> phrase;
|
||||
@ -2536,7 +2535,7 @@ void Collection::parse_search_query(const std::string &query, std::vector<std::s
|
||||
|
||||
if(q_include_tokens.empty()) {
|
||||
// this can happen if the only query token is an exclusion token
|
||||
q_include_tokens.emplace_back("*");
|
||||
q_include_tokens.emplace_back("##hrhdh##");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1037,7 +1037,7 @@ Option<bool> CollectionManager::do_search(std::map<std::string, std::string>& re
|
||||
}
|
||||
|
||||
Option<nlohmann::json> result_op = collection->search(raw_query, search_fields, simple_filter_query, facet_fields,
|
||||
sort_fields, num_typos,
|
||||
sort_fields, num_typos, stopwords_set,
|
||||
per_page,
|
||||
page,
|
||||
token_order, prefixes, drop_tokens_threshold,
|
||||
|
Loading…
x
Reference in New Issue
Block a user