From 05504b1f4d61b2d3df1bd303aac36a9609dfe674 Mon Sep 17 00:00:00 2001 From: krunal1313 Date: Thu, 13 Jul 2023 15:44:40 +0530 Subject: [PATCH] updating and adding more tests --- src/collection.cpp | 3 +- src/stopwords_manager.cpp | 6 +- test/core_api_utils_test.cpp | 139 ++++++++++++++++++-------------- test/stopwords_manager_test.cpp | 122 ++++++++++++++++++++++++++++ 4 files changed, 207 insertions(+), 63 deletions(-) create mode 100644 test/stopwords_manager_test.cpp diff --git a/src/collection.cpp b/src/collection.cpp index 21d4086a..c48ef95f 100644 --- a/src/collection.cpp +++ b/src/collection.cpp @@ -2433,7 +2433,8 @@ void Collection::parse_search_query(const std::string &query, std::vector StopwordsManager::get_stopword(const std::string& stopword_name, sp return Option(true); } - return Option(404, "Not found."); + return Option(404, "stopword `" + stopword_name +"` not found."); } Option StopwordsManager::upsert_stopword(const std::string& stopword_name, const nlohmann::json& stopwords, const std::string& locale) { @@ -57,6 +57,10 @@ Option StopwordsManager::delete_stopword(const std::string& stopword_name) return Option(500, "Unable to delete from store."); } + if(stopword_configs.find(stopword_name) == stopword_configs.end()) { + return Option(404, "Stopword `" + stopword_name + "` not found."); + } + stopword_configs.erase(stopword_name); return Option(true); } diff --git a/test/core_api_utils_test.cpp b/test/core_api_utils_test.cpp index 9221dbce..523f4d86 100644 --- a/test/core_api_utils_test.cpp +++ b/test/core_api_utils_test.cpp @@ -1173,33 +1173,31 @@ TEST_F(CoreAPIUtilsTest, StopwordsBasics) { FAIL(); } - nlohmann::json body; + req->params["collection"] = "coll1"; + req->params["q"] = "the"; + req->params["query_by"] = "title"; + req->params["stopwords"] = "articles"; - body["searches"] = nlohmann::json::array(); - nlohmann::json search; - search["collection"] = "coll1"; - search["q"] = "the"; - search["query_by"] = "title"; - body["searches"].push_back(search); - - req->body = body.dump(); nlohmann::json embedded_params; - embedded_params["stopwords"] = "articles"; - req->embedded_params_vec.push_back(embedded_params); - - post_multi_search(req, res); - nlohmann::json results = nlohmann::json::parse(res->body)["results"][0]; + auto now_ts = std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()).count(); + std::string json_results; + auto search_op = collectionManager.do_search(req->params, embedded_params, json_results, now_ts); + if(!search_op.error().empty()) { + LOG(ERROR) << search_op.error(); + } + ASSERT_TRUE(search_op.ok()); + nlohmann::json results = nlohmann::json::parse(json_results); ASSERT_EQ(0, results["hits"].size()); req->params.clear(); - req->embedded_params_vec.clear(); - body.clear(); + json_results.clear(); //when not all words in query are stopwords then it should match the remaining words stopword_value = R"( - {"stopwords": ["america", "europe"], "locale": "en"} - )"_json; + {"stopwords": ["america", "europe"], "locale": "en"} + )"_json; req->params["collection"] = "coll1"; req->params["name"] = "continents"; @@ -1211,54 +1209,73 @@ TEST_F(CoreAPIUtilsTest, StopwordsBasics) { FAIL(); } - body["searches"] = nlohmann::json::array(); - search["collection"] = "coll1"; - search["q"] = "America Man"; - search["query_by"] = "title"; - body["searches"].push_back(search); - - req->body = body.dump(); - embedded_params["stopwords"] = "continents"; - req->embedded_params_vec.push_back(embedded_params); - - post_multi_search(req, res); - results = nlohmann::json::parse(res->body)["results"][0]; + req->params["q"] = "America Man"; + req->params["query_by"] = "title"; + req->params["stopwords"] = "continents"; + search_op = collectionManager.do_search(req->params, embedded_params, json_results, now_ts); + ASSERT_TRUE(search_op.ok()); + results = nlohmann::json::parse(json_results); ASSERT_EQ(0, results["hits"].size()); req->params.clear(); - req->embedded_params_vec.clear(); - body.clear(); + json_results.clear(); req->params["collection"] = "coll1"; + req->params["q"] = "a deadman"; + req->params["query_by"] = "title"; + req->params["stopwords"] = "articles"; - body["searches"] = nlohmann::json::array(); - search["collection"] = "coll1"; - search["q"] = "a deadman"; - search["query_by"] = "title"; - body["searches"].push_back(search); - - req->body = body.dump(); - embedded_params["stopwords"] = "articles"; - req->embedded_params_vec.push_back(embedded_params); - - post_multi_search(req, res); - results = nlohmann::json::parse(res->body)["results"][0]; - + search_op = collectionManager.do_search(req->params, embedded_params, json_results, now_ts); + ASSERT_TRUE(search_op.ok()); + results = nlohmann::json::parse(json_results); ASSERT_EQ(2, results["hits"].size()); + req->params.clear(); + json_results.clear(); + + //try deteting nonexisting stopword + req->params["collection"] = "coll1"; + req->params["name"] = "country"; + + result = del_stopword(req, res); + ASSERT_EQ(404, res->status_code); + ASSERT_STREQ("{\"message\": \"stopword `country` not found.\"}", res->body.c_str()); + + req->params.clear(); + json_results.clear(); + + //detete stopword and apply in search + req->params["collection"] = "coll1"; + req->params["name"] = "continents"; + + result = del_stopword(req, res); + if(!result) { + LOG(ERROR) << res->body; + FAIL(); + } + + req->params["collection"] = "coll1"; + req->params["q"] = "America"; + req->params["query_by"] = "title"; + req->params["stopwords"] = "continents"; + + search_op = collectionManager.do_search(req->params, embedded_params, json_results, now_ts); + ASSERT_TRUE(search_op.ok()); + results = nlohmann::json::parse(json_results); + ASSERT_EQ(1, results["hits"].size()); + collectionManager.drop_collection("coll1"); } - TEST_F(CoreAPIUtilsTest, StopwordsValidation) { nlohmann::json schema = R"({ - "name": "coll1", - "fields": [ - {"name": "title", "type": "string" }, - {"name": "points", "type": "int32" } - ] - })"_json; + "name": "coll1", + "fields": [ + {"name": "title", "type": "string" }, + {"name": "points", "type": "int32" } + ] + })"_json; auto op = collectionManager.create_collection(schema); ASSERT_TRUE(op.ok()); @@ -1268,8 +1285,8 @@ TEST_F(CoreAPIUtilsTest, StopwordsValidation) { std::shared_ptr res = std::make_shared(nullptr); auto stopword_value = R"( - {"stopwords": ["america", "europe"]} - )"_json; + {"stopwords": ["america", "europe"]} + )"_json; req->params["collection"] = "coll1"; req->params["name"] = "continents"; @@ -1277,12 +1294,12 @@ TEST_F(CoreAPIUtilsTest, StopwordsValidation) { auto result = put_upsert_stopword(req, res); ASSERT_EQ(400, res->status_code); - ASSERT_STREQ("{\"message\": \"Parameter `locale` is required\"}", res->body.c_str()); + ASSERT_EQ("{\"message\": \"Parameter `locale` is required\"}", res->body); //with a typo stopword_value = R"( - {"stopword": ["america", "europe"], "locale": "en"} - )"_json; + {"stopword": ["america", "europe"], "locale": "en"} + )"_json; req->params["collection"] = "coll1"; req->params["name"] = "continents"; @@ -1294,8 +1311,8 @@ TEST_F(CoreAPIUtilsTest, StopwordsValidation) { //check for value types stopword_value = R"( - {"stopwords": ["america", "europe"], "locale": 12} - )"_json; + {"stopwords": ["america", "europe"], "locale": 12} + )"_json; req->params["collection"] = "coll1"; req->params["name"] = "continents"; @@ -1306,8 +1323,8 @@ TEST_F(CoreAPIUtilsTest, StopwordsValidation) { ASSERT_STREQ("{\"message\": \"Parameter `locale` is required as string value\"}", res->body.c_str()); stopword_value = R"( - {"stopwords": [1, 5, 2], "locale": "ko"} - )"_json; + {"stopwords": [1, 5, 2], "locale": "ko"} + )"_json; req->params["collection"] = "coll1"; req->params["name"] = "continents"; diff --git a/test/stopwords_manager_test.cpp b/test/stopwords_manager_test.cpp new file mode 100644 index 00000000..d21d3324 --- /dev/null +++ b/test/stopwords_manager_test.cpp @@ -0,0 +1,122 @@ +#include +#include "include/sparsepp.h" +#include "include/stopwords_manager.h" +#include "include/store.h" + +class StopwordsManagerTest : public ::testing::Test { +protected: + Store *store; + + virtual void SetUp() { + std::string state_dir_path = "/tmp/typesense_test/stopwords_manager"; + LOG(INFO) << "Truncating and creating: " << state_dir_path; + system(("rm -rf "+state_dir_path+" && mkdir -p "+state_dir_path).c_str()); + store = new Store(state_dir_path); + } + + virtual void TearDown() { + delete store; + } +}; + +TEST_F(StopwordsManagerTest, UpsertGetStopwords) { + StopwordsManager stopwordsManager; + stopwordsManager.init(store); + + auto stopwords1 = R"( + {"stopwords": ["america", "europe"]} + )"_json; + + auto upsert_op = stopwordsManager.upsert_stopword("continents", stopwords1["stopwords"], "en"); + ASSERT_TRUE(upsert_op.ok()); + + auto stopwords2 = R"( + {"stopwords": ["a", "an", "the"]} + )"_json; + + upsert_op = stopwordsManager.upsert_stopword("articles", stopwords2["stopwords"], "en"); + ASSERT_TRUE(upsert_op.ok()); + + auto stopwords3 = R"( + {"stopwords": ["India", "United States", "Japan", "China"]} + )"_json; + + upsert_op = stopwordsManager.upsert_stopword("countries", stopwords3["stopwords"], "en"); + ASSERT_TRUE(upsert_op.ok()); + + auto stopword_config = stopwordsManager.get_stopwords(); + ASSERT_EQ(3, stopword_config.size()); //total stopwords set + ASSERT_EQ(3, stopword_config["articles"].size()); + ASSERT_EQ(2, stopword_config["continents"].size()); + ASSERT_EQ(5, stopword_config["countries"].size()); //with tokenization United States will be splited into two +} + +TEST_F(StopwordsManagerTest, GetStopword) { + StopwordsManager stopwordsManager; + stopwordsManager.init(store); + + auto stopwords = R"({"stopwords": ["a", "an", "the"]})"_json; + + auto upsert_op = stopwordsManager.upsert_stopword("articles", stopwords["stopwords"], "en"); + ASSERT_TRUE(upsert_op.ok()); + + spp::sparse_hash_set stopwords_set; + + auto get_op = stopwordsManager.get_stopword("articles", stopwords_set); + ASSERT_TRUE(get_op.ok()); + ASSERT_EQ(3, stopwords_set.size()); + + stopwords_set.clear(); + + //try to fetch non-existing stopword + get_op = stopwordsManager.get_stopword("country", stopwords_set); + ASSERT_FALSE(get_op.ok()); + ASSERT_EQ(404, get_op.code()); + ASSERT_EQ("stopword `country` not found.", get_op.error()); + + //try fetching stopwords with token + stopwords = R"({"stopwords": ["India", "United States", "Japan"]})"_json; + + upsert_op = stopwordsManager.upsert_stopword("country", stopwords["stopwords"], "en"); + ASSERT_TRUE(upsert_op.ok()); + + get_op = stopwordsManager.get_stopword("country", stopwords_set); + ASSERT_TRUE(get_op.ok()); + ASSERT_EQ(4, stopwords_set.size()); //as United States will be tokenized and counted 2 stopwords +} + +TEST_F(StopwordsManagerTest, DeleteStopword) { + StopwordsManager stopwordsManager; + stopwordsManager.init(store); + + auto stopwords1 = R"( + {"stopwords": ["america", "europe"]} + )"_json; + + auto upsert_op = stopwordsManager.upsert_stopword("continents", stopwords1["stopwords"], "en"); + ASSERT_TRUE(upsert_op.ok()); + + auto stopwords2 = R"( + {"stopwords": ["a", "an", "the"]} + )"_json; + + upsert_op = stopwordsManager.upsert_stopword("articles", stopwords2["stopwords"], "en"); + ASSERT_TRUE(upsert_op.ok()); + + spp::sparse_hash_set stopwords_set; + + //delete a stopword + auto del_op = stopwordsManager.delete_stopword("articles"); + ASSERT_TRUE(del_op.ok()); + + auto get_op = stopwordsManager.get_stopword("articles", stopwords_set); + ASSERT_FALSE(get_op.ok()); + ASSERT_EQ(404, get_op.code()); + ASSERT_EQ("stopword `articles` not found.", get_op.error()); + + //delete non-existing stopword + del_op = stopwordsManager.delete_stopword("states"); + ASSERT_FALSE(del_op.ok()); + ASSERT_EQ(404, del_op.code()); + ASSERT_EQ("Stopword `states` not found.", del_op.error()); +} \ No newline at end of file