mirror of
https://github.com/typesense/typesense.git
synced 2025-05-21 22:33:27 +08:00
fixing GET response with locale field and validation for empty stopword array
This commit is contained in:
parent
78352c0e23
commit
dee68a4d84
@ -7,9 +7,14 @@
|
||||
#include "mutex"
|
||||
#include "store.h"
|
||||
|
||||
struct stopword_struct_t {
|
||||
spp::sparse_hash_set<std::string> stopwords;
|
||||
std::string locale;
|
||||
};
|
||||
|
||||
class StopwordsManager{
|
||||
private:
|
||||
spp::sparse_hash_map<std::string, spp::sparse_hash_set<std::string>> stopword_configs;
|
||||
spp::sparse_hash_map<std::string, stopword_struct_t> stopword_configs;
|
||||
|
||||
static std::string get_stopword_key(const std::string & stopword_name);
|
||||
|
||||
@ -32,9 +37,9 @@ public:
|
||||
|
||||
void init(Store* store);
|
||||
|
||||
spp::sparse_hash_map<std::string, spp::sparse_hash_set<std::string>> get_stopwords() const;
|
||||
spp::sparse_hash_map<std::string, stopword_struct_t> get_stopwords() const;
|
||||
|
||||
Option<bool> get_stopword(const std::string&, spp::sparse_hash_set<std::string>&) const;
|
||||
Option<bool> get_stopword(const std::string&, stopword_struct_t&) const;
|
||||
|
||||
Option<bool> upsert_stopword(const std::string&, const nlohmann::json&, bool write_to_store=false);
|
||||
|
||||
|
@ -3202,9 +3202,9 @@ void Collection::parse_search_query(const std::string &query, std::vector<std::s
|
||||
q_include_tokens = {query};
|
||||
} else {
|
||||
std::vector<std::string> tokens;
|
||||
spp::sparse_hash_set<std::string> stopwords_list;
|
||||
stopword_struct_t stopwordStruct;
|
||||
if(!stopwords_set.empty()) {
|
||||
const auto &stopword_op = StopwordsManager::get_instance().get_stopword(stopwords_set, stopwords_list);
|
||||
const auto &stopword_op = StopwordsManager::get_instance().get_stopword(stopwords_set, stopwordStruct);
|
||||
if (!stopword_op.ok()) {
|
||||
LOG(ERROR) << stopword_op.error();
|
||||
LOG(ERROR) << "Error fetching stopword_list for stopword " << stopwords_set;
|
||||
@ -3221,7 +3221,7 @@ void Collection::parse_search_query(const std::string &query, std::vector<std::s
|
||||
Tokenizer(query, true, false, locale, custom_symbols, token_separators).tokenize(tokens);
|
||||
}
|
||||
|
||||
for (const auto val: stopwords_list) {
|
||||
for (const auto val: stopwordStruct.stopwords) {
|
||||
tokens.erase(std::remove(tokens.begin(), tokens.end(), val), tokens.end());
|
||||
}
|
||||
|
||||
|
@ -2172,14 +2172,19 @@ bool del_preset(const std::shared_ptr<http_req>& req, const std::shared_ptr<http
|
||||
|
||||
bool get_stopwords(const std::shared_ptr<http_req>& req, const std::shared_ptr<http_res>& res) {
|
||||
StopwordsManager& stopwordManager = StopwordsManager::get_instance();
|
||||
const spp::sparse_hash_map<std::string, spp::sparse_hash_set<std::string>>& stopwords = stopwordManager.get_stopwords();
|
||||
const spp::sparse_hash_map<std::string, stopword_struct_t>& stopwords = stopwordManager.get_stopwords();
|
||||
nlohmann::json res_json = nlohmann::json::object();
|
||||
res_json["stopwords"] = nlohmann::json::array();
|
||||
|
||||
for(const auto& stopwords_kv: stopwords) {
|
||||
nlohmann::json stopword;
|
||||
stopword["id"] = stopwords_kv.first;
|
||||
for(const auto& val : stopwords_kv.second) {
|
||||
|
||||
if(!stopwords_kv.second.locale.empty()) {
|
||||
stopword["locale"] = stopwords_kv.second.locale;
|
||||
}
|
||||
|
||||
for(const auto& val : stopwords_kv.second.stopwords) {
|
||||
stopword["stopwords"].push_back(val);
|
||||
}
|
||||
res_json["stopwords"].push_back(stopword);
|
||||
@ -2193,8 +2198,8 @@ bool get_stopword(const std::shared_ptr<http_req>& req, const std::shared_ptr<ht
|
||||
const std::string & stopword_name = req->params["name"];
|
||||
StopwordsManager& stopwordManager = StopwordsManager::get_instance();
|
||||
|
||||
spp::sparse_hash_set<std::string> stopwords;
|
||||
Option<bool> stopword_op = stopwordManager.get_stopword(stopword_name, stopwords);
|
||||
stopword_struct_t stopwords_struct;
|
||||
Option<bool> stopword_op = stopwordManager.get_stopword(stopword_name, stopwords_struct);
|
||||
|
||||
if(!stopword_op.ok()) {
|
||||
res->set(stopword_op.code(), stopword_op.error());
|
||||
@ -2203,7 +2208,11 @@ bool get_stopword(const std::shared_ptr<http_req>& req, const std::shared_ptr<ht
|
||||
|
||||
nlohmann::json res_json;
|
||||
res_json["id"] = stopword_name;
|
||||
for(const auto& stopword : stopwords) {
|
||||
if(!stopwords_struct.locale.empty()) {
|
||||
res_json["locale"] = stopwords_struct.locale;
|
||||
}
|
||||
|
||||
for(const auto& stopword : stopwords_struct.stopwords) {
|
||||
res_json["stopwords"].push_back(stopword);
|
||||
}
|
||||
|
||||
@ -2231,7 +2240,7 @@ bool put_upsert_stopword(const std::shared_ptr<http_req>& req, const std::shared
|
||||
return false;
|
||||
}
|
||||
|
||||
req_json["name"] = stopword_name;
|
||||
req_json["id"] = stopword_name;
|
||||
|
||||
res->set_200(req_json.dump());
|
||||
return true;
|
||||
@ -2241,8 +2250,8 @@ bool del_stopword(const std::shared_ptr<http_req>& req, const std::shared_ptr<ht
|
||||
const std::string & stopword_name = req->params["name"];
|
||||
StopwordsManager& stopwordManager = StopwordsManager::get_instance();
|
||||
|
||||
spp::sparse_hash_set<std::string> stopwords;
|
||||
Option<bool> stopword_op = stopwordManager.get_stopword(stopword_name, stopwords);
|
||||
stopword_struct_t stopwords_struct;
|
||||
Option<bool> stopword_op = stopwordManager.get_stopword(stopword_name, stopwords_struct);
|
||||
if(!stopword_op.ok()) {
|
||||
res->set(stopword_op.code(), stopword_op.error());
|
||||
return false;
|
||||
@ -2257,7 +2266,11 @@ bool del_stopword(const std::shared_ptr<http_req>& req, const std::shared_ptr<ht
|
||||
|
||||
nlohmann::json res_json;
|
||||
res_json["id"] = stopword_name;
|
||||
for(const auto& stopword : stopwords) {
|
||||
if(!stopwords_struct.locale.empty()) {
|
||||
res_json["locale"] = stopwords_struct.locale;
|
||||
}
|
||||
|
||||
for(const auto& stopword : stopwords_struct.stopwords) {
|
||||
res_json["stopwords"].push_back(stopword);
|
||||
}
|
||||
|
||||
|
@ -5,17 +5,17 @@ void StopwordsManager::init(Store* _store) {
|
||||
store = _store;
|
||||
}
|
||||
|
||||
spp::sparse_hash_map<std::string, spp::sparse_hash_set<std::string>> StopwordsManager::get_stopwords() const {
|
||||
spp::sparse_hash_map<std::string, stopword_struct_t> StopwordsManager::get_stopwords() const {
|
||||
std::shared_lock lock(mutex);
|
||||
return stopword_configs;
|
||||
}
|
||||
|
||||
Option<bool> StopwordsManager::get_stopword(const std::string& stopword_name, spp::sparse_hash_set<std::string>& stopwords) const {
|
||||
Option<bool> StopwordsManager::get_stopword(const std::string& stopword_name, stopword_struct_t& stopwords_struct) const {
|
||||
std::shared_lock lock(mutex);
|
||||
|
||||
const auto& it = stopword_configs.find(stopword_name);
|
||||
if(it != stopword_configs.end()) {
|
||||
stopwords = it->second;
|
||||
stopwords_struct = it->second;
|
||||
return Option<bool>(true);
|
||||
}
|
||||
|
||||
@ -34,6 +34,10 @@ Option<bool> StopwordsManager::upsert_stopword(const std::string& stopword_name,
|
||||
return Option<bool>(400, (std::string("Parameter `") + STOPWORD_VALUES + "` is required"));
|
||||
}
|
||||
|
||||
if(stopwords_json[STOPWORD_VALUES].empty()) {
|
||||
return Option<bool>(400, (std::string("Parameter `") + STOPWORD_VALUES + "` is empty"));
|
||||
}
|
||||
|
||||
if((!stopwords_json[STOPWORD_VALUES].is_array()) || (!stopwords_json[STOPWORD_VALUES][0].is_string())) {
|
||||
return Option<bool>(400, (std::string("Parameter `") + STOPWORD_VALUES + "` is required as string array value"));
|
||||
}
|
||||
@ -65,7 +69,7 @@ Option<bool> StopwordsManager::upsert_stopword(const std::string& stopword_name,
|
||||
}
|
||||
tokens.clear();
|
||||
}
|
||||
stopword_configs[stopword_name] = stopwords_set;
|
||||
stopword_configs[stopword_name] = stopword_struct_t{stopwords_set, locale};
|
||||
return Option<bool>(true);
|
||||
}
|
||||
|
||||
|
@ -56,21 +56,21 @@ TEST_F(StopwordsManagerTest, UpsertGetStopwords) {
|
||||
ASSERT_TRUE(stopword_config.find("articles") != stopword_config.end());
|
||||
ASSERT_TRUE(stopword_config.find("continents") != stopword_config.end());
|
||||
|
||||
ASSERT_EQ(3, stopword_config["articles"].size());
|
||||
ASSERT_TRUE(stopword_config["articles"].find("a") != stopword_config["articles"].end());
|
||||
ASSERT_TRUE(stopword_config["articles"].find("an") != stopword_config["articles"].end());
|
||||
ASSERT_TRUE(stopword_config["articles"].find("the") != stopword_config["articles"].end());
|
||||
ASSERT_EQ(3, stopword_config["articles"].stopwords.size());
|
||||
ASSERT_TRUE(stopword_config["articles"].stopwords.find("a") != stopword_config["articles"].stopwords.end());
|
||||
ASSERT_TRUE(stopword_config["articles"].stopwords.find("an") != stopword_config["articles"].stopwords.end());
|
||||
ASSERT_TRUE(stopword_config["articles"].stopwords.find("the") != stopword_config["articles"].stopwords.end());
|
||||
|
||||
ASSERT_EQ(2, stopword_config["continents"].size());
|
||||
ASSERT_TRUE(stopword_config["continents"].find("america") != stopword_config["continents"].end());
|
||||
ASSERT_TRUE(stopword_config["continents"].find("europe") != stopword_config["continents"].end());
|
||||
ASSERT_EQ(2, stopword_config["continents"].stopwords.size());
|
||||
ASSERT_TRUE(stopword_config["continents"].stopwords.find("america") != stopword_config["continents"].stopwords.end());
|
||||
ASSERT_TRUE(stopword_config["continents"].stopwords.find("europe") != stopword_config["continents"].stopwords.end());
|
||||
|
||||
ASSERT_EQ(5, stopword_config["countries"].size()); //with tokenization United States will be splited into two
|
||||
ASSERT_TRUE(stopword_config["countries"].find("india") != stopword_config["countries"].end());
|
||||
ASSERT_TRUE(stopword_config["countries"].find("united") != stopword_config["countries"].end());
|
||||
ASSERT_TRUE(stopword_config["countries"].find("states") != stopword_config["countries"].end());
|
||||
ASSERT_TRUE(stopword_config["countries"].find("china") != stopword_config["countries"].end());
|
||||
ASSERT_TRUE(stopword_config["countries"].find("japan") != stopword_config["countries"].end());
|
||||
ASSERT_EQ(5, stopword_config["countries"].stopwords.size()); //with tokenization United States will be splited into two
|
||||
ASSERT_TRUE(stopword_config["countries"].stopwords.find("india") != stopword_config["countries"].stopwords.end());
|
||||
ASSERT_TRUE(stopword_config["countries"].stopwords.find("united") != stopword_config["countries"].stopwords.end());
|
||||
ASSERT_TRUE(stopword_config["countries"].stopwords.find("states") != stopword_config["countries"].stopwords.end());
|
||||
ASSERT_TRUE(stopword_config["countries"].stopwords.find("china") != stopword_config["countries"].stopwords.end());
|
||||
ASSERT_TRUE(stopword_config["countries"].stopwords.find("japan") != stopword_config["countries"].stopwords.end());
|
||||
}
|
||||
|
||||
TEST_F(StopwordsManagerTest, GetStopword) {
|
||||
@ -79,16 +79,14 @@ TEST_F(StopwordsManagerTest, GetStopword) {
|
||||
auto upsert_op = stopwordsManager.upsert_stopword("articles", stopwords);
|
||||
ASSERT_TRUE(upsert_op.ok());
|
||||
|
||||
spp::sparse_hash_set<std::string> stopwords_set;
|
||||
stopword_struct_t stopwordStruct;
|
||||
|
||||
auto get_op = stopwordsManager.get_stopword("articles", stopwords_set);
|
||||
auto get_op = stopwordsManager.get_stopword("articles", stopwordStruct);
|
||||
ASSERT_TRUE(get_op.ok());
|
||||
ASSERT_EQ(3, stopwords_set.size());
|
||||
|
||||
stopwords_set.clear();
|
||||
ASSERT_EQ(3, stopwordStruct.stopwords.size());
|
||||
|
||||
//try to fetch non-existing stopword
|
||||
get_op = stopwordsManager.get_stopword("country", stopwords_set);
|
||||
get_op = stopwordsManager.get_stopword("country", stopwordStruct);
|
||||
ASSERT_FALSE(get_op.ok());
|
||||
ASSERT_EQ(404, get_op.code());
|
||||
ASSERT_EQ("Stopword `country` not found.", get_op.error());
|
||||
@ -99,9 +97,9 @@ TEST_F(StopwordsManagerTest, GetStopword) {
|
||||
upsert_op = stopwordsManager.upsert_stopword("country", stopwords);
|
||||
ASSERT_TRUE(upsert_op.ok());
|
||||
|
||||
get_op = stopwordsManager.get_stopword("country", stopwords_set);
|
||||
get_op = stopwordsManager.get_stopword("country", stopwordStruct);
|
||||
ASSERT_TRUE(get_op.ok());
|
||||
ASSERT_EQ(4, stopwords_set.size()); //as United States will be tokenized and counted 2 stopwords
|
||||
ASSERT_EQ(4, stopwordStruct.stopwords.size()); //as United States will be tokenized and counted 2 stopwords
|
||||
}
|
||||
|
||||
TEST_F(StopwordsManagerTest, DeleteStopword) {
|
||||
@ -119,13 +117,13 @@ TEST_F(StopwordsManagerTest, DeleteStopword) {
|
||||
upsert_op = stopwordsManager.upsert_stopword("articles", stopwords2);
|
||||
ASSERT_TRUE(upsert_op.ok());
|
||||
|
||||
spp::sparse_hash_set<std::string> stopwords_set;
|
||||
stopword_struct_t stopwordStruct;
|
||||
|
||||
//delete a stopword
|
||||
auto del_op = stopwordsManager.delete_stopword("articles");
|
||||
ASSERT_TRUE(del_op.ok());
|
||||
|
||||
auto get_op = stopwordsManager.get_stopword("articles", stopwords_set);
|
||||
auto get_op = stopwordsManager.get_stopword("articles", stopwordStruct);
|
||||
ASSERT_FALSE(get_op.ok());
|
||||
ASSERT_EQ(404, get_op.code());
|
||||
ASSERT_EQ("Stopword `articles` not found.", get_op.error());
|
||||
@ -147,9 +145,9 @@ TEST_F(StopwordsManagerTest, UpdateStopword) {
|
||||
|
||||
auto stopword_config = stopwordsManager.get_stopwords();
|
||||
|
||||
ASSERT_EQ(2, stopword_config["continents"].size());
|
||||
ASSERT_TRUE(stopword_config["continents"].find("america") != stopword_config["continents"].end());
|
||||
ASSERT_TRUE(stopword_config["continents"].find("europe") != stopword_config["continents"].end());
|
||||
ASSERT_EQ(2, stopword_config["continents"].stopwords.size());
|
||||
ASSERT_TRUE(stopword_config["continents"].stopwords.find("america") != stopword_config["continents"].stopwords.end());
|
||||
ASSERT_TRUE(stopword_config["continents"].stopwords.find("europe") != stopword_config["continents"].stopwords.end());
|
||||
|
||||
//adding new words with same name should replace the stopwords set
|
||||
stopwords_json = R"(
|
||||
@ -160,10 +158,10 @@ TEST_F(StopwordsManagerTest, UpdateStopword) {
|
||||
|
||||
stopword_config = stopwordsManager.get_stopwords();
|
||||
|
||||
ASSERT_EQ(3, stopword_config["continents"].size());
|
||||
ASSERT_TRUE(stopword_config["continents"].find("china") != stopword_config["continents"].end());
|
||||
ASSERT_TRUE(stopword_config["continents"].find("india") != stopword_config["continents"].end());
|
||||
ASSERT_TRUE(stopword_config["continents"].find("japan") != stopword_config["continents"].end());
|
||||
ASSERT_EQ(3, stopword_config["continents"].stopwords.size());
|
||||
ASSERT_TRUE(stopword_config["continents"].stopwords.find("china") != stopword_config["continents"].stopwords.end());
|
||||
ASSERT_TRUE(stopword_config["continents"].stopwords.find("india") != stopword_config["continents"].stopwords.end());
|
||||
ASSERT_TRUE(stopword_config["continents"].stopwords.find("japan") != stopword_config["continents"].stopwords.end());
|
||||
}
|
||||
|
||||
TEST_F(StopwordsManagerTest, StopwordsBasics) {
|
||||
@ -401,12 +399,12 @@ TEST_F(StopwordsManagerTest, ReloadStopwordsOnRestart) {
|
||||
auto stopword_config = stopwordsManager.get_stopwords();
|
||||
ASSERT_TRUE(stopword_config.find("genre") != stopword_config.end());
|
||||
|
||||
ASSERT_EQ(5, stopword_config["genre"].size());
|
||||
ASSERT_TRUE(stopword_config["genre"].find("pop") != stopword_config["genre"].end());
|
||||
ASSERT_TRUE(stopword_config["genre"].find("indie") != stopword_config["genre"].end());
|
||||
ASSERT_TRUE(stopword_config["genre"].find("rock") != stopword_config["genre"].end());
|
||||
ASSERT_TRUE(stopword_config["genre"].find("metal") != stopword_config["genre"].end());
|
||||
ASSERT_TRUE(stopword_config["genre"].find("folk") != stopword_config["genre"].end());
|
||||
ASSERT_EQ(5, stopword_config["genre"].stopwords.size());
|
||||
ASSERT_TRUE(stopword_config["genre"].stopwords.find("pop") != stopword_config["genre"].stopwords.end());
|
||||
ASSERT_TRUE(stopword_config["genre"].stopwords.find("indie") != stopword_config["genre"].stopwords.end());
|
||||
ASSERT_TRUE(stopword_config["genre"].stopwords.find("rock") != stopword_config["genre"].stopwords.end());
|
||||
ASSERT_TRUE(stopword_config["genre"].stopwords.find("metal") != stopword_config["genre"].stopwords.end());
|
||||
ASSERT_TRUE(stopword_config["genre"].stopwords.find("folk") != stopword_config["genre"].stopwords.end());
|
||||
|
||||
//dispose collection manager and reload all stopwords
|
||||
collectionManager.dispose();
|
||||
@ -424,10 +422,10 @@ TEST_F(StopwordsManagerTest, ReloadStopwordsOnRestart) {
|
||||
stopword_config = stopwordsManager.get_stopwords();
|
||||
ASSERT_TRUE(stopword_config.find("genre") != stopword_config.end());
|
||||
|
||||
ASSERT_EQ(5, stopword_config["genre"].size());
|
||||
ASSERT_TRUE(stopword_config["genre"].find("pop") != stopword_config["genre"].end());
|
||||
ASSERT_TRUE(stopword_config["genre"].find("indie") != stopword_config["genre"].end());
|
||||
ASSERT_TRUE(stopword_config["genre"].find("rock") != stopword_config["genre"].end());
|
||||
ASSERT_TRUE(stopword_config["genre"].find("metal") != stopword_config["genre"].end());
|
||||
ASSERT_TRUE(stopword_config["genre"].find("folk") != stopword_config["genre"].end());
|
||||
ASSERT_EQ(5, stopword_config["genre"].stopwords.size());
|
||||
ASSERT_TRUE(stopword_config["genre"].stopwords.find("pop") != stopword_config["genre"].stopwords.end());
|
||||
ASSERT_TRUE(stopword_config["genre"].stopwords.find("indie") != stopword_config["genre"].stopwords.end());
|
||||
ASSERT_TRUE(stopword_config["genre"].stopwords.find("rock") != stopword_config["genre"].stopwords.end());
|
||||
ASSERT_TRUE(stopword_config["genre"].stopwords.find("metal") != stopword_config["genre"].stopwords.end());
|
||||
ASSERT_TRUE(stopword_config["genre"].stopwords.find("folk") != stopword_config["genre"].stopwords.end());
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user