Merge pull request #1409 from krunal1313/stopword_get_reponse_fix

GET response with locale and validation for empty stopwword list
This commit is contained in:
Kishore Nallan 2023-12-04 20:34:10 +05:30 committed by GitHub
commit a7b3e6c523
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 89 additions and 81 deletions

View File

@ -7,9 +7,30 @@
#include "mutex"
#include "store.h"
struct stopword_struct_t {
std::string id;
spp::sparse_hash_set<std::string> stopwords;
std::string locale;
nlohmann::json to_json() const {
nlohmann::json doc;
doc["id"] = id;
if(!locale.empty()) {
doc["locale"] = locale;
}
for(const auto& stopword : stopwords) {
doc["stopwords"].push_back(stopword);
}
return doc;
}
};
class StopwordsManager{
private:
spp::sparse_hash_map<std::string, spp::sparse_hash_set<std::string>> stopword_configs;
spp::sparse_hash_map<std::string, stopword_struct_t> stopword_configs;
static std::string get_stopword_key(const std::string & stopword_name);
@ -32,9 +53,9 @@ public:
void init(Store* store);
spp::sparse_hash_map<std::string, spp::sparse_hash_set<std::string>> get_stopwords() const;
spp::sparse_hash_map<std::string, stopword_struct_t> get_stopwords() const;
Option<bool> get_stopword(const std::string&, spp::sparse_hash_set<std::string>&) const;
Option<bool> get_stopword(const std::string&, stopword_struct_t&) const;
Option<bool> upsert_stopword(const std::string&, const nlohmann::json&, bool write_to_store=false);

View File

@ -3202,9 +3202,9 @@ void Collection::parse_search_query(const std::string &query, std::vector<std::s
q_include_tokens = {query};
} else {
std::vector<std::string> tokens;
spp::sparse_hash_set<std::string> stopwords_list;
stopword_struct_t stopwordStruct;
if(!stopwords_set.empty()) {
const auto &stopword_op = StopwordsManager::get_instance().get_stopword(stopwords_set, stopwords_list);
const auto &stopword_op = StopwordsManager::get_instance().get_stopword(stopwords_set, stopwordStruct);
if (!stopword_op.ok()) {
LOG(ERROR) << stopword_op.error();
LOG(ERROR) << "Error fetching stopword_list for stopword " << stopwords_set;
@ -3221,7 +3221,7 @@ void Collection::parse_search_query(const std::string &query, std::vector<std::s
Tokenizer(query, true, false, locale, custom_symbols, token_separators).tokenize(tokens);
}
for (const auto val: stopwords_list) {
for (const auto val: stopwordStruct.stopwords) {
tokens.erase(std::remove(tokens.begin(), tokens.end(), val), tokens.end());
}

View File

@ -2172,16 +2172,12 @@ bool del_preset(const std::shared_ptr<http_req>& req, const std::shared_ptr<http
bool get_stopwords(const std::shared_ptr<http_req>& req, const std::shared_ptr<http_res>& res) {
StopwordsManager& stopwordManager = StopwordsManager::get_instance();
const spp::sparse_hash_map<std::string, spp::sparse_hash_set<std::string>>& stopwords = stopwordManager.get_stopwords();
const spp::sparse_hash_map<std::string, stopword_struct_t>& stopwords = stopwordManager.get_stopwords();
nlohmann::json res_json = nlohmann::json::object();
res_json["stopwords"] = nlohmann::json::array();
for(const auto& stopwords_kv: stopwords) {
nlohmann::json stopword;
stopword["id"] = stopwords_kv.first;
for(const auto& val : stopwords_kv.second) {
stopword["stopwords"].push_back(val);
}
auto stopword = stopwords_kv.second.to_json();
res_json["stopwords"].push_back(stopword);
}
@ -2193,8 +2189,8 @@ bool get_stopword(const std::shared_ptr<http_req>& req, const std::shared_ptr<ht
const std::string & stopword_name = req->params["name"];
StopwordsManager& stopwordManager = StopwordsManager::get_instance();
spp::sparse_hash_set<std::string> stopwords;
Option<bool> stopword_op = stopwordManager.get_stopword(stopword_name, stopwords);
stopword_struct_t stopwordStruct;
Option<bool> stopword_op = stopwordManager.get_stopword(stopword_name, stopwordStruct);
if(!stopword_op.ok()) {
res->set(stopword_op.code(), stopword_op.error());
@ -2202,10 +2198,8 @@ bool get_stopword(const std::shared_ptr<http_req>& req, const std::shared_ptr<ht
}
nlohmann::json res_json;
res_json["id"] = stopword_name;
for(const auto& stopword : stopwords) {
res_json["stopwords"].push_back(stopword);
}
res_json["stopwords"] = stopwordStruct.to_json();
res->set_200(res_json.dump());
return true;
@ -2231,7 +2225,7 @@ bool put_upsert_stopword(const std::shared_ptr<http_req>& req, const std::shared
return false;
}
req_json["name"] = stopword_name;
req_json["id"] = stopword_name;
res->set_200(req_json.dump());
return true;
@ -2241,25 +2235,15 @@ bool del_stopword(const std::shared_ptr<http_req>& req, const std::shared_ptr<ht
const std::string & stopword_name = req->params["name"];
StopwordsManager& stopwordManager = StopwordsManager::get_instance();
spp::sparse_hash_set<std::string> stopwords;
Option<bool> stopword_op = stopwordManager.get_stopword(stopword_name, stopwords);
if(!stopword_op.ok()) {
res->set(stopword_op.code(), stopword_op.error());
return false;
}
Option<bool> delete_op = stopwordManager.delete_stopword(stopword_name);
if(!delete_op.ok()) {
res->set_500(delete_op.error());
res->set(delete_op.code(), delete_op.error());
return false;
}
nlohmann::json res_json;
res_json["id"] = stopword_name;
for(const auto& stopword : stopwords) {
res_json["stopwords"].push_back(stopword);
}
res->set_200(res_json.dump());
return true;

View File

@ -5,17 +5,17 @@ void StopwordsManager::init(Store* _store) {
store = _store;
}
spp::sparse_hash_map<std::string, spp::sparse_hash_set<std::string>> StopwordsManager::get_stopwords() const {
spp::sparse_hash_map<std::string, stopword_struct_t> StopwordsManager::get_stopwords() const {
std::shared_lock lock(mutex);
return stopword_configs;
}
Option<bool> StopwordsManager::get_stopword(const std::string& stopword_name, spp::sparse_hash_set<std::string>& stopwords) const {
Option<bool> StopwordsManager::get_stopword(const std::string& stopword_name, stopword_struct_t& stopwords_struct) const {
std::shared_lock lock(mutex);
const auto& it = stopword_configs.find(stopword_name);
if(it != stopword_configs.end()) {
stopwords = it->second;
stopwords_struct = it->second;
return Option<bool>(true);
}
@ -34,6 +34,10 @@ Option<bool> StopwordsManager::upsert_stopword(const std::string& stopword_name,
return Option<bool>(400, (std::string("Parameter `") + STOPWORD_VALUES + "` is required"));
}
if(stopwords_json[STOPWORD_VALUES].empty()) {
return Option<bool>(400, (std::string("Parameter `") + STOPWORD_VALUES + "` is empty"));
}
if((!stopwords_json[STOPWORD_VALUES].is_array()) || (!stopwords_json[STOPWORD_VALUES][0].is_string())) {
return Option<bool>(400, (std::string("Parameter `") + STOPWORD_VALUES + "` is required as string array value"));
}
@ -65,7 +69,7 @@ Option<bool> StopwordsManager::upsert_stopword(const std::string& stopword_name,
}
tokens.clear();
}
stopword_configs[stopword_name] = stopwords_set;
stopword_configs[stopword_name] = stopword_struct_t{stopword_name, stopwords_set, locale};
return Option<bool>(true);
}
@ -76,16 +80,17 @@ std::string StopwordsManager::get_stopword_key(const std::string& stopword_name)
Option<bool> StopwordsManager::delete_stopword(const std::string& stopword_name) {
std::unique_lock lock(mutex);
bool removed = store->remove(get_stopword_key(stopword_name));
if(!removed) {
return Option<bool>(500, "Unable to delete from store.");
}
if(stopword_configs.find(stopword_name) == stopword_configs.end()) {
return Option<bool>(404, "Stopword `" + stopword_name + "` not found.");
}
stopword_configs.erase(stopword_name);
bool removed = store->remove(get_stopword_key(stopword_name));
if(!removed) {
return Option<bool>(500, "Unable to delete from store.");
}
return Option<bool>(true);
}

View File

@ -56,21 +56,21 @@ TEST_F(StopwordsManagerTest, UpsertGetStopwords) {
ASSERT_TRUE(stopword_config.find("articles") != stopword_config.end());
ASSERT_TRUE(stopword_config.find("continents") != stopword_config.end());
ASSERT_EQ(3, stopword_config["articles"].size());
ASSERT_TRUE(stopword_config["articles"].find("a") != stopword_config["articles"].end());
ASSERT_TRUE(stopword_config["articles"].find("an") != stopword_config["articles"].end());
ASSERT_TRUE(stopword_config["articles"].find("the") != stopword_config["articles"].end());
ASSERT_EQ(3, stopword_config["articles"].stopwords.size());
ASSERT_TRUE(stopword_config["articles"].stopwords.find("a") != stopword_config["articles"].stopwords.end());
ASSERT_TRUE(stopword_config["articles"].stopwords.find("an") != stopword_config["articles"].stopwords.end());
ASSERT_TRUE(stopword_config["articles"].stopwords.find("the") != stopword_config["articles"].stopwords.end());
ASSERT_EQ(2, stopword_config["continents"].size());
ASSERT_TRUE(stopword_config["continents"].find("america") != stopword_config["continents"].end());
ASSERT_TRUE(stopword_config["continents"].find("europe") != stopword_config["continents"].end());
ASSERT_EQ(2, stopword_config["continents"].stopwords.size());
ASSERT_TRUE(stopword_config["continents"].stopwords.find("america") != stopword_config["continents"].stopwords.end());
ASSERT_TRUE(stopword_config["continents"].stopwords.find("europe") != stopword_config["continents"].stopwords.end());
ASSERT_EQ(5, stopword_config["countries"].size()); //with tokenization United States will be splited into two
ASSERT_TRUE(stopword_config["countries"].find("india") != stopword_config["countries"].end());
ASSERT_TRUE(stopword_config["countries"].find("united") != stopword_config["countries"].end());
ASSERT_TRUE(stopword_config["countries"].find("states") != stopword_config["countries"].end());
ASSERT_TRUE(stopword_config["countries"].find("china") != stopword_config["countries"].end());
ASSERT_TRUE(stopword_config["countries"].find("japan") != stopword_config["countries"].end());
ASSERT_EQ(5, stopword_config["countries"].stopwords.size()); //with tokenization United States will be splited into two
ASSERT_TRUE(stopword_config["countries"].stopwords.find("india") != stopword_config["countries"].stopwords.end());
ASSERT_TRUE(stopword_config["countries"].stopwords.find("united") != stopword_config["countries"].stopwords.end());
ASSERT_TRUE(stopword_config["countries"].stopwords.find("states") != stopword_config["countries"].stopwords.end());
ASSERT_TRUE(stopword_config["countries"].stopwords.find("china") != stopword_config["countries"].stopwords.end());
ASSERT_TRUE(stopword_config["countries"].stopwords.find("japan") != stopword_config["countries"].stopwords.end());
}
TEST_F(StopwordsManagerTest, GetStopword) {
@ -79,16 +79,14 @@ TEST_F(StopwordsManagerTest, GetStopword) {
auto upsert_op = stopwordsManager.upsert_stopword("articles", stopwords);
ASSERT_TRUE(upsert_op.ok());
spp::sparse_hash_set<std::string> stopwords_set;
stopword_struct_t stopwordStruct;
auto get_op = stopwordsManager.get_stopword("articles", stopwords_set);
auto get_op = stopwordsManager.get_stopword("articles", stopwordStruct);
ASSERT_TRUE(get_op.ok());
ASSERT_EQ(3, stopwords_set.size());
stopwords_set.clear();
ASSERT_EQ(3, stopwordStruct.stopwords.size());
//try to fetch non-existing stopword
get_op = stopwordsManager.get_stopword("country", stopwords_set);
get_op = stopwordsManager.get_stopword("country", stopwordStruct);
ASSERT_FALSE(get_op.ok());
ASSERT_EQ(404, get_op.code());
ASSERT_EQ("Stopword `country` not found.", get_op.error());
@ -99,9 +97,9 @@ TEST_F(StopwordsManagerTest, GetStopword) {
upsert_op = stopwordsManager.upsert_stopword("country", stopwords);
ASSERT_TRUE(upsert_op.ok());
get_op = stopwordsManager.get_stopword("country", stopwords_set);
get_op = stopwordsManager.get_stopword("country", stopwordStruct);
ASSERT_TRUE(get_op.ok());
ASSERT_EQ(4, stopwords_set.size()); //as United States will be tokenized and counted 2 stopwords
ASSERT_EQ(4, stopwordStruct.stopwords.size()); //as United States will be tokenized and counted 2 stopwords
}
TEST_F(StopwordsManagerTest, DeleteStopword) {
@ -119,13 +117,13 @@ TEST_F(StopwordsManagerTest, DeleteStopword) {
upsert_op = stopwordsManager.upsert_stopword("articles", stopwords2);
ASSERT_TRUE(upsert_op.ok());
spp::sparse_hash_set<std::string> stopwords_set;
stopword_struct_t stopwordStruct;
//delete a stopword
auto del_op = stopwordsManager.delete_stopword("articles");
ASSERT_TRUE(del_op.ok());
auto get_op = stopwordsManager.get_stopword("articles", stopwords_set);
auto get_op = stopwordsManager.get_stopword("articles", stopwordStruct);
ASSERT_FALSE(get_op.ok());
ASSERT_EQ(404, get_op.code());
ASSERT_EQ("Stopword `articles` not found.", get_op.error());
@ -147,9 +145,9 @@ TEST_F(StopwordsManagerTest, UpdateStopword) {
auto stopword_config = stopwordsManager.get_stopwords();
ASSERT_EQ(2, stopword_config["continents"].size());
ASSERT_TRUE(stopword_config["continents"].find("america") != stopword_config["continents"].end());
ASSERT_TRUE(stopword_config["continents"].find("europe") != stopword_config["continents"].end());
ASSERT_EQ(2, stopword_config["continents"].stopwords.size());
ASSERT_TRUE(stopword_config["continents"].stopwords.find("america") != stopword_config["continents"].stopwords.end());
ASSERT_TRUE(stopword_config["continents"].stopwords.find("europe") != stopword_config["continents"].stopwords.end());
//adding new words with same name should replace the stopwords set
stopwords_json = R"(
@ -160,10 +158,10 @@ TEST_F(StopwordsManagerTest, UpdateStopword) {
stopword_config = stopwordsManager.get_stopwords();
ASSERT_EQ(3, stopword_config["continents"].size());
ASSERT_TRUE(stopword_config["continents"].find("china") != stopword_config["continents"].end());
ASSERT_TRUE(stopword_config["continents"].find("india") != stopword_config["continents"].end());
ASSERT_TRUE(stopword_config["continents"].find("japan") != stopword_config["continents"].end());
ASSERT_EQ(3, stopword_config["continents"].stopwords.size());
ASSERT_TRUE(stopword_config["continents"].stopwords.find("china") != stopword_config["continents"].stopwords.end());
ASSERT_TRUE(stopword_config["continents"].stopwords.find("india") != stopword_config["continents"].stopwords.end());
ASSERT_TRUE(stopword_config["continents"].stopwords.find("japan") != stopword_config["continents"].stopwords.end());
}
TEST_F(StopwordsManagerTest, StopwordsBasics) {
@ -401,12 +399,12 @@ TEST_F(StopwordsManagerTest, ReloadStopwordsOnRestart) {
auto stopword_config = stopwordsManager.get_stopwords();
ASSERT_TRUE(stopword_config.find("genre") != stopword_config.end());
ASSERT_EQ(5, stopword_config["genre"].size());
ASSERT_TRUE(stopword_config["genre"].find("pop") != stopword_config["genre"].end());
ASSERT_TRUE(stopword_config["genre"].find("indie") != stopword_config["genre"].end());
ASSERT_TRUE(stopword_config["genre"].find("rock") != stopword_config["genre"].end());
ASSERT_TRUE(stopword_config["genre"].find("metal") != stopword_config["genre"].end());
ASSERT_TRUE(stopword_config["genre"].find("folk") != stopword_config["genre"].end());
ASSERT_EQ(5, stopword_config["genre"].stopwords.size());
ASSERT_TRUE(stopword_config["genre"].stopwords.find("pop") != stopword_config["genre"].stopwords.end());
ASSERT_TRUE(stopword_config["genre"].stopwords.find("indie") != stopword_config["genre"].stopwords.end());
ASSERT_TRUE(stopword_config["genre"].stopwords.find("rock") != stopword_config["genre"].stopwords.end());
ASSERT_TRUE(stopword_config["genre"].stopwords.find("metal") != stopword_config["genre"].stopwords.end());
ASSERT_TRUE(stopword_config["genre"].stopwords.find("folk") != stopword_config["genre"].stopwords.end());
//dispose collection manager and reload all stopwords
collectionManager.dispose();
@ -424,10 +422,10 @@ TEST_F(StopwordsManagerTest, ReloadStopwordsOnRestart) {
stopword_config = stopwordsManager.get_stopwords();
ASSERT_TRUE(stopword_config.find("genre") != stopword_config.end());
ASSERT_EQ(5, stopword_config["genre"].size());
ASSERT_TRUE(stopword_config["genre"].find("pop") != stopword_config["genre"].end());
ASSERT_TRUE(stopword_config["genre"].find("indie") != stopword_config["genre"].end());
ASSERT_TRUE(stopword_config["genre"].find("rock") != stopword_config["genre"].end());
ASSERT_TRUE(stopword_config["genre"].find("metal") != stopword_config["genre"].end());
ASSERT_TRUE(stopword_config["genre"].find("folk") != stopword_config["genre"].end());
ASSERT_EQ(5, stopword_config["genre"].stopwords.size());
ASSERT_TRUE(stopword_config["genre"].stopwords.find("pop") != stopword_config["genre"].stopwords.end());
ASSERT_TRUE(stopword_config["genre"].stopwords.find("indie") != stopword_config["genre"].stopwords.end());
ASSERT_TRUE(stopword_config["genre"].stopwords.find("rock") != stopword_config["genre"].stopwords.end());
ASSERT_TRUE(stopword_config["genre"].stopwords.find("metal") != stopword_config["genre"].stopwords.end());
ASSERT_TRUE(stopword_config["genre"].stopwords.find("folk") != stopword_config["genre"].stopwords.end());
}