This commit is contained in:
krunal1313 2023-07-03 15:41:31 +05:30
parent 2c0c108d0e
commit 009194e51a
3 changed files with 19 additions and 20 deletions

View File

@ -391,7 +391,7 @@ public:
void parse_search_query(const std::string &query, std::vector<std::string>& q_include_tokens,
std::vector<std::vector<std::string>>& q_exclude_tokens,
std::vector<std::vector<std::string>>& q_phrases,
const std::string& locale, const bool already_segmented) const;
const std::string& locale, const bool already_segmented, const std::string& stopword) const;
// PUBLIC OPERATIONS

View File

@ -1483,7 +1483,7 @@ Option<nlohmann::json> Collection::search(std::string raw_query,
field_query_tokens.emplace_back(query_tokens_t{});
parse_search_query(query, q_include_tokens,
field_query_tokens[0].q_exclude_tokens, field_query_tokens[0].q_phrases, "",
false);
false, stopword);
for(size_t i = 0; i < q_include_tokens.size(); i++) {
auto& q_include_token = q_include_tokens[i];
field_query_tokens[0].q_include_tokens.emplace_back(i, q_include_token, (i == q_include_tokens.size() - 1),
@ -1495,7 +1495,7 @@ Option<nlohmann::json> Collection::search(std::string raw_query,
parse_search_query(query, q_include_tokens,
field_query_tokens[0].q_exclude_tokens,
field_query_tokens[0].q_phrases,
field_locale, pre_segmented_query);
field_locale, pre_segmented_query, stopword);
// process filter overrides first, before synonyms (order is important)
@ -1516,20 +1516,6 @@ Option<nlohmann::json> Collection::search(std::string raw_query,
}
}
nlohmann::json stopwords_list;
const auto &stopword_op = CollectionManager::get_instance().get_stopword(stopword, stopwords_list);
if (stopword_op.ok()) {
auto &include_tokens = field_query_tokens[0].q_include_tokens;
for (const auto &search_item: stopwords_list.items()) {
auto val = search_item.value().get<std::string>();
std::transform(val.begin(), val.end(), val.begin(), ::tolower);
include_tokens.erase(std::remove_if(include_tokens.begin(), include_tokens.end(),
[&](const auto& token) {
return token.value == val;
}), include_tokens.end());
}
}
for(size_t i = 1; i < search_fields.size(); i++) {
field_query_tokens.emplace_back(query_tokens_t{});
field_query_tokens[i] = field_query_tokens[0];
@ -2435,12 +2421,19 @@ void Collection::process_filter_overrides(std::vector<const override_t*>& filter
void Collection::parse_search_query(const std::string &query, std::vector<std::string>& q_include_tokens,
std::vector<std::vector<std::string>>& q_exclude_tokens,
std::vector<std::vector<std::string>>& q_phrases,
const std::string& locale, const bool already_segmented) const {
const std::string& locale, const bool already_segmented, const std::string& stopword) const {
if(query == "*") {
q_exclude_tokens = {};
q_include_tokens = {query};
} else {
std::vector<std::string> tokens;
nlohmann::json stopwords_list;
if(!stopword.empty()) {
const auto &stopword_op = CollectionManager::get_instance().get_stopword(stopword, stopwords_list);
if (!stopword_op.ok()) {
LOG(ERROR) << "Error fetching stopword_list for stopword " << stopword << " "<<stopword_op.error();
}
}
if(already_segmented) {
StringUtils::split(query, tokens, " ");
@ -2452,6 +2445,12 @@ void Collection::parse_search_query(const std::string &query, std::vector<std::s
Tokenizer(query, true, false, locale, custom_symbols, token_separators).tokenize(tokens);
}
for (const auto &search_item: stopwords_list.items()) {
auto val = search_item.value().get<std::string>();
std::transform(val.begin(), val.end(), val.begin(), ::tolower);
tokens.erase(std::remove(tokens.begin(), tokens.end(), val), tokens.end());
}
bool exclude_operator_prior = false;
bool phrase_search_op_prior = false;
std::vector<std::string> phrase;
@ -2536,7 +2535,7 @@ void Collection::parse_search_query(const std::string &query, std::vector<std::s
if(q_include_tokens.empty()) {
// this can happen if the only query token is an exclusion token
q_include_tokens.emplace_back("*");
q_include_tokens.emplace_back("##hrhdh##");
}
}
}

View File

@ -1037,7 +1037,7 @@ Option<bool> CollectionManager::do_search(std::map<std::string, std::string>& re
}
Option<nlohmann::json> result_op = collection->search(raw_query, search_fields, simple_filter_query, facet_fields,
sort_fields, num_typos,
sort_fields, num_typos, stopwords_set,
per_page,
page,
token_order, prefixes, drop_tokens_threshold,