diff --git a/include/stopwords_manager.h b/include/stopwords_manager.h index a2f0ec11..45f087e0 100644 --- a/include/stopwords_manager.h +++ b/include/stopwords_manager.h @@ -7,9 +7,30 @@ #include "mutex" #include "store.h" +struct stopword_struct_t { + std::string id; + spp::sparse_hash_set stopwords; + std::string locale; + + nlohmann::json to_json() const { + nlohmann::json doc; + + doc["id"] = id; + if(!locale.empty()) { + doc["locale"] = locale; + } + + for(const auto& stopword : stopwords) { + doc["stopwords"].push_back(stopword); + } + + return doc; + } +}; + class StopwordsManager{ private: - spp::sparse_hash_map> stopword_configs; + spp::sparse_hash_map stopword_configs; static std::string get_stopword_key(const std::string & stopword_name); @@ -32,9 +53,9 @@ public: void init(Store* store); - spp::sparse_hash_map> get_stopwords() const; + spp::sparse_hash_map get_stopwords() const; - Option get_stopword(const std::string&, spp::sparse_hash_set&) const; + Option get_stopword(const std::string&, stopword_struct_t&) const; Option upsert_stopword(const std::string&, const nlohmann::json&, bool write_to_store=false); diff --git a/src/collection.cpp b/src/collection.cpp index ec019d18..7fc281b8 100644 --- a/src/collection.cpp +++ b/src/collection.cpp @@ -3202,9 +3202,9 @@ void Collection::parse_search_query(const std::string &query, std::vector tokens; - spp::sparse_hash_set stopwords_list; + stopword_struct_t stopwordStruct; if(!stopwords_set.empty()) { - const auto &stopword_op = StopwordsManager::get_instance().get_stopword(stopwords_set, stopwords_list); + const auto &stopword_op = StopwordsManager::get_instance().get_stopword(stopwords_set, stopwordStruct); if (!stopword_op.ok()) { LOG(ERROR) << stopword_op.error(); LOG(ERROR) << "Error fetching stopword_list for stopword " << stopwords_set; @@ -3221,7 +3221,7 @@ void Collection::parse_search_query(const std::string &query, std::vector& req, const std::shared_ptr& req, const std::shared_ptr& res) { StopwordsManager& stopwordManager = StopwordsManager::get_instance(); - const spp::sparse_hash_map>& stopwords = stopwordManager.get_stopwords(); + const spp::sparse_hash_map& stopwords = stopwordManager.get_stopwords(); nlohmann::json res_json = nlohmann::json::object(); res_json["stopwords"] = nlohmann::json::array(); for(const auto& stopwords_kv: stopwords) { - nlohmann::json stopword; - stopword["id"] = stopwords_kv.first; - for(const auto& val : stopwords_kv.second) { - stopword["stopwords"].push_back(val); - } + auto stopword = stopwords_kv.second.to_json(); res_json["stopwords"].push_back(stopword); } @@ -2193,8 +2189,8 @@ bool get_stopword(const std::shared_ptr& req, const std::shared_ptrparams["name"]; StopwordsManager& stopwordManager = StopwordsManager::get_instance(); - spp::sparse_hash_set stopwords; - Option stopword_op = stopwordManager.get_stopword(stopword_name, stopwords); + stopword_struct_t stopwordStruct; + Option stopword_op = stopwordManager.get_stopword(stopword_name, stopwordStruct); if(!stopword_op.ok()) { res->set(stopword_op.code(), stopword_op.error()); @@ -2202,10 +2198,8 @@ bool get_stopword(const std::shared_ptr& req, const std::shared_ptrset_200(res_json.dump()); return true; @@ -2231,7 +2225,7 @@ bool put_upsert_stopword(const std::shared_ptr& req, const std::shared return false; } - req_json["name"] = stopword_name; + req_json["id"] = stopword_name; res->set_200(req_json.dump()); return true; @@ -2241,25 +2235,15 @@ bool del_stopword(const std::shared_ptr& req, const std::shared_ptrparams["name"]; StopwordsManager& stopwordManager = StopwordsManager::get_instance(); - spp::sparse_hash_set stopwords; - Option stopword_op = stopwordManager.get_stopword(stopword_name, stopwords); - if(!stopword_op.ok()) { - res->set(stopword_op.code(), stopword_op.error()); - return false; - } - Option delete_op = stopwordManager.delete_stopword(stopword_name); if(!delete_op.ok()) { - res->set_500(delete_op.error()); + res->set(delete_op.code(), delete_op.error()); return false; } nlohmann::json res_json; res_json["id"] = stopword_name; - for(const auto& stopword : stopwords) { - res_json["stopwords"].push_back(stopword); - } res->set_200(res_json.dump()); return true; diff --git a/src/stopwords_manager.cpp b/src/stopwords_manager.cpp index af72c39f..30744e46 100644 --- a/src/stopwords_manager.cpp +++ b/src/stopwords_manager.cpp @@ -5,17 +5,17 @@ void StopwordsManager::init(Store* _store) { store = _store; } -spp::sparse_hash_map> StopwordsManager::get_stopwords() const { +spp::sparse_hash_map StopwordsManager::get_stopwords() const { std::shared_lock lock(mutex); return stopword_configs; } -Option StopwordsManager::get_stopword(const std::string& stopword_name, spp::sparse_hash_set& stopwords) const { +Option StopwordsManager::get_stopword(const std::string& stopword_name, stopword_struct_t& stopwords_struct) const { std::shared_lock lock(mutex); const auto& it = stopword_configs.find(stopword_name); if(it != stopword_configs.end()) { - stopwords = it->second; + stopwords_struct = it->second; return Option(true); } @@ -34,6 +34,10 @@ Option StopwordsManager::upsert_stopword(const std::string& stopword_name, return Option(400, (std::string("Parameter `") + STOPWORD_VALUES + "` is required")); } + if(stopwords_json[STOPWORD_VALUES].empty()) { + return Option(400, (std::string("Parameter `") + STOPWORD_VALUES + "` is empty")); + } + if((!stopwords_json[STOPWORD_VALUES].is_array()) || (!stopwords_json[STOPWORD_VALUES][0].is_string())) { return Option(400, (std::string("Parameter `") + STOPWORD_VALUES + "` is required as string array value")); } @@ -65,7 +69,7 @@ Option StopwordsManager::upsert_stopword(const std::string& stopword_name, } tokens.clear(); } - stopword_configs[stopword_name] = stopwords_set; + stopword_configs[stopword_name] = stopword_struct_t{stopword_name, stopwords_set, locale}; return Option(true); } @@ -76,16 +80,17 @@ std::string StopwordsManager::get_stopword_key(const std::string& stopword_name) Option StopwordsManager::delete_stopword(const std::string& stopword_name) { std::unique_lock lock(mutex); - bool removed = store->remove(get_stopword_key(stopword_name)); - if(!removed) { - return Option(500, "Unable to delete from store."); - } - if(stopword_configs.find(stopword_name) == stopword_configs.end()) { return Option(404, "Stopword `" + stopword_name + "` not found."); } stopword_configs.erase(stopword_name); + + bool removed = store->remove(get_stopword_key(stopword_name)); + if(!removed) { + return Option(500, "Unable to delete from store."); + } + return Option(true); } diff --git a/test/stopwords_manager_test.cpp b/test/stopwords_manager_test.cpp index bb06eb10..ee1a03ac 100644 --- a/test/stopwords_manager_test.cpp +++ b/test/stopwords_manager_test.cpp @@ -56,21 +56,21 @@ TEST_F(StopwordsManagerTest, UpsertGetStopwords) { ASSERT_TRUE(stopword_config.find("articles") != stopword_config.end()); ASSERT_TRUE(stopword_config.find("continents") != stopword_config.end()); - ASSERT_EQ(3, stopword_config["articles"].size()); - ASSERT_TRUE(stopword_config["articles"].find("a") != stopword_config["articles"].end()); - ASSERT_TRUE(stopword_config["articles"].find("an") != stopword_config["articles"].end()); - ASSERT_TRUE(stopword_config["articles"].find("the") != stopword_config["articles"].end()); + ASSERT_EQ(3, stopword_config["articles"].stopwords.size()); + ASSERT_TRUE(stopword_config["articles"].stopwords.find("a") != stopword_config["articles"].stopwords.end()); + ASSERT_TRUE(stopword_config["articles"].stopwords.find("an") != stopword_config["articles"].stopwords.end()); + ASSERT_TRUE(stopword_config["articles"].stopwords.find("the") != stopword_config["articles"].stopwords.end()); - ASSERT_EQ(2, stopword_config["continents"].size()); - ASSERT_TRUE(stopword_config["continents"].find("america") != stopword_config["continents"].end()); - ASSERT_TRUE(stopword_config["continents"].find("europe") != stopword_config["continents"].end()); + ASSERT_EQ(2, stopword_config["continents"].stopwords.size()); + ASSERT_TRUE(stopword_config["continents"].stopwords.find("america") != stopword_config["continents"].stopwords.end()); + ASSERT_TRUE(stopword_config["continents"].stopwords.find("europe") != stopword_config["continents"].stopwords.end()); - ASSERT_EQ(5, stopword_config["countries"].size()); //with tokenization United States will be splited into two - ASSERT_TRUE(stopword_config["countries"].find("india") != stopword_config["countries"].end()); - ASSERT_TRUE(stopword_config["countries"].find("united") != stopword_config["countries"].end()); - ASSERT_TRUE(stopword_config["countries"].find("states") != stopword_config["countries"].end()); - ASSERT_TRUE(stopword_config["countries"].find("china") != stopword_config["countries"].end()); - ASSERT_TRUE(stopword_config["countries"].find("japan") != stopword_config["countries"].end()); + ASSERT_EQ(5, stopword_config["countries"].stopwords.size()); //with tokenization United States will be splited into two + ASSERT_TRUE(stopword_config["countries"].stopwords.find("india") != stopword_config["countries"].stopwords.end()); + ASSERT_TRUE(stopword_config["countries"].stopwords.find("united") != stopword_config["countries"].stopwords.end()); + ASSERT_TRUE(stopword_config["countries"].stopwords.find("states") != stopword_config["countries"].stopwords.end()); + ASSERT_TRUE(stopword_config["countries"].stopwords.find("china") != stopword_config["countries"].stopwords.end()); + ASSERT_TRUE(stopword_config["countries"].stopwords.find("japan") != stopword_config["countries"].stopwords.end()); } TEST_F(StopwordsManagerTest, GetStopword) { @@ -79,16 +79,14 @@ TEST_F(StopwordsManagerTest, GetStopword) { auto upsert_op = stopwordsManager.upsert_stopword("articles", stopwords); ASSERT_TRUE(upsert_op.ok()); - spp::sparse_hash_set stopwords_set; + stopword_struct_t stopwordStruct; - auto get_op = stopwordsManager.get_stopword("articles", stopwords_set); + auto get_op = stopwordsManager.get_stopword("articles", stopwordStruct); ASSERT_TRUE(get_op.ok()); - ASSERT_EQ(3, stopwords_set.size()); - - stopwords_set.clear(); + ASSERT_EQ(3, stopwordStruct.stopwords.size()); //try to fetch non-existing stopword - get_op = stopwordsManager.get_stopword("country", stopwords_set); + get_op = stopwordsManager.get_stopword("country", stopwordStruct); ASSERT_FALSE(get_op.ok()); ASSERT_EQ(404, get_op.code()); ASSERT_EQ("Stopword `country` not found.", get_op.error()); @@ -99,9 +97,9 @@ TEST_F(StopwordsManagerTest, GetStopword) { upsert_op = stopwordsManager.upsert_stopword("country", stopwords); ASSERT_TRUE(upsert_op.ok()); - get_op = stopwordsManager.get_stopword("country", stopwords_set); + get_op = stopwordsManager.get_stopword("country", stopwordStruct); ASSERT_TRUE(get_op.ok()); - ASSERT_EQ(4, stopwords_set.size()); //as United States will be tokenized and counted 2 stopwords + ASSERT_EQ(4, stopwordStruct.stopwords.size()); //as United States will be tokenized and counted 2 stopwords } TEST_F(StopwordsManagerTest, DeleteStopword) { @@ -119,13 +117,13 @@ TEST_F(StopwordsManagerTest, DeleteStopword) { upsert_op = stopwordsManager.upsert_stopword("articles", stopwords2); ASSERT_TRUE(upsert_op.ok()); - spp::sparse_hash_set stopwords_set; + stopword_struct_t stopwordStruct; //delete a stopword auto del_op = stopwordsManager.delete_stopword("articles"); ASSERT_TRUE(del_op.ok()); - auto get_op = stopwordsManager.get_stopword("articles", stopwords_set); + auto get_op = stopwordsManager.get_stopword("articles", stopwordStruct); ASSERT_FALSE(get_op.ok()); ASSERT_EQ(404, get_op.code()); ASSERT_EQ("Stopword `articles` not found.", get_op.error()); @@ -147,9 +145,9 @@ TEST_F(StopwordsManagerTest, UpdateStopword) { auto stopword_config = stopwordsManager.get_stopwords(); - ASSERT_EQ(2, stopword_config["continents"].size()); - ASSERT_TRUE(stopword_config["continents"].find("america") != stopword_config["continents"].end()); - ASSERT_TRUE(stopword_config["continents"].find("europe") != stopword_config["continents"].end()); + ASSERT_EQ(2, stopword_config["continents"].stopwords.size()); + ASSERT_TRUE(stopword_config["continents"].stopwords.find("america") != stopword_config["continents"].stopwords.end()); + ASSERT_TRUE(stopword_config["continents"].stopwords.find("europe") != stopword_config["continents"].stopwords.end()); //adding new words with same name should replace the stopwords set stopwords_json = R"( @@ -160,10 +158,10 @@ TEST_F(StopwordsManagerTest, UpdateStopword) { stopword_config = stopwordsManager.get_stopwords(); - ASSERT_EQ(3, stopword_config["continents"].size()); - ASSERT_TRUE(stopword_config["continents"].find("china") != stopword_config["continents"].end()); - ASSERT_TRUE(stopword_config["continents"].find("india") != stopword_config["continents"].end()); - ASSERT_TRUE(stopword_config["continents"].find("japan") != stopword_config["continents"].end()); + ASSERT_EQ(3, stopword_config["continents"].stopwords.size()); + ASSERT_TRUE(stopword_config["continents"].stopwords.find("china") != stopword_config["continents"].stopwords.end()); + ASSERT_TRUE(stopword_config["continents"].stopwords.find("india") != stopword_config["continents"].stopwords.end()); + ASSERT_TRUE(stopword_config["continents"].stopwords.find("japan") != stopword_config["continents"].stopwords.end()); } TEST_F(StopwordsManagerTest, StopwordsBasics) { @@ -401,12 +399,12 @@ TEST_F(StopwordsManagerTest, ReloadStopwordsOnRestart) { auto stopword_config = stopwordsManager.get_stopwords(); ASSERT_TRUE(stopword_config.find("genre") != stopword_config.end()); - ASSERT_EQ(5, stopword_config["genre"].size()); - ASSERT_TRUE(stopword_config["genre"].find("pop") != stopword_config["genre"].end()); - ASSERT_TRUE(stopword_config["genre"].find("indie") != stopword_config["genre"].end()); - ASSERT_TRUE(stopword_config["genre"].find("rock") != stopword_config["genre"].end()); - ASSERT_TRUE(stopword_config["genre"].find("metal") != stopword_config["genre"].end()); - ASSERT_TRUE(stopword_config["genre"].find("folk") != stopword_config["genre"].end()); + ASSERT_EQ(5, stopword_config["genre"].stopwords.size()); + ASSERT_TRUE(stopword_config["genre"].stopwords.find("pop") != stopword_config["genre"].stopwords.end()); + ASSERT_TRUE(stopword_config["genre"].stopwords.find("indie") != stopword_config["genre"].stopwords.end()); + ASSERT_TRUE(stopword_config["genre"].stopwords.find("rock") != stopword_config["genre"].stopwords.end()); + ASSERT_TRUE(stopword_config["genre"].stopwords.find("metal") != stopword_config["genre"].stopwords.end()); + ASSERT_TRUE(stopword_config["genre"].stopwords.find("folk") != stopword_config["genre"].stopwords.end()); //dispose collection manager and reload all stopwords collectionManager.dispose(); @@ -424,10 +422,10 @@ TEST_F(StopwordsManagerTest, ReloadStopwordsOnRestart) { stopword_config = stopwordsManager.get_stopwords(); ASSERT_TRUE(stopword_config.find("genre") != stopword_config.end()); - ASSERT_EQ(5, stopword_config["genre"].size()); - ASSERT_TRUE(stopword_config["genre"].find("pop") != stopword_config["genre"].end()); - ASSERT_TRUE(stopword_config["genre"].find("indie") != stopword_config["genre"].end()); - ASSERT_TRUE(stopword_config["genre"].find("rock") != stopword_config["genre"].end()); - ASSERT_TRUE(stopword_config["genre"].find("metal") != stopword_config["genre"].end()); - ASSERT_TRUE(stopword_config["genre"].find("folk") != stopword_config["genre"].end()); + ASSERT_EQ(5, stopword_config["genre"].stopwords.size()); + ASSERT_TRUE(stopword_config["genre"].stopwords.find("pop") != stopword_config["genre"].stopwords.end()); + ASSERT_TRUE(stopword_config["genre"].stopwords.find("indie") != stopword_config["genre"].stopwords.end()); + ASSERT_TRUE(stopword_config["genre"].stopwords.find("rock") != stopword_config["genre"].stopwords.end()); + ASSERT_TRUE(stopword_config["genre"].stopwords.find("metal") != stopword_config["genre"].stopwords.end()); + ASSERT_TRUE(stopword_config["genre"].stopwords.find("folk") != stopword_config["genre"].stopwords.end()); }