From 05504b1f4d61b2d3df1bd303aac36a9609dfe674 Mon Sep 17 00:00:00 2001
From: krunal1313 <krunal1313@gmail.com>
Date: Thu, 13 Jul 2023 15:44:40 +0530
Subject: [PATCH] updating and adding more tests

---
 src/collection.cpp              |   3 +-
 src/stopwords_manager.cpp       |   6 +-
 test/core_api_utils_test.cpp    | 139 ++++++++++++++++++--------------
 test/stopwords_manager_test.cpp | 122 ++++++++++++++++++++++++++++
 4 files changed, 207 insertions(+), 63 deletions(-)
 create mode 100644 test/stopwords_manager_test.cpp
diff --git a/src/collection.cpp b/src/collection.cpp
index 21d4086a..c48ef95f 100644
--- a/src/collection.cpp
+++ b/src/collection.cpp
@@ -2433,7 +2433,8 @@ void Collection::parse_search_query(const std::string &query, std::vector<std::s
         if(!stopwords_set.empty()) {
             const auto &stopword_op = StopwordsManager::get_instance().get_stopword(stopwords_set, stopwords_list);
             if (!stopword_op.ok()) {
-                LOG(ERROR) << "Error fetching stopword_list for stopword " << stopwords_set << " "<<stopword_op.error();
+                LOG(ERROR) << stopword_op.error();
+                LOG(ERROR) << "Error fetching stopword_list for stopword " << stopwords_set;
             }
         }
 
diff --git a/src/stopwords_manager.cpp b/src/stopwords_manager.cpp
index a29f6c18..52e3d4f0 100644
--- a/src/stopwords_manager.cpp
+++ b/src/stopwords_manager.cpp
@@ -19,7 +19,7 @@ Option<bool> StopwordsManager::get_stopword(const std::string& stopword_name, sp
         return Option<bool>(true);
     }
 
-    return Option<bool>(404, "Not found.");
+    return Option<bool>(404, "stopword `" + stopword_name +"` not found.");
 }
 
 Option<bool> StopwordsManager::upsert_stopword(const std::string& stopword_name, const nlohmann::json& stopwords, const std::string& locale) {
@@ -57,6 +57,10 @@ Option<bool> StopwordsManager::delete_stopword(const std::string& stopword_name)
         return Option<bool>(500, "Unable to delete from store.");
     }
 
+    if(stopword_configs.find(stopword_name) == stopword_configs.end()) {
+        return Option<bool>(404, "Stopword `" + stopword_name + "` not found.");
+    }
+
     stopword_configs.erase(stopword_name);
     return Option<bool>(true);
 }
diff --git a/test/core_api_utils_test.cpp b/test/core_api_utils_test.cpp
index 9221dbce..523f4d86 100644
--- a/test/core_api_utils_test.cpp
+++ b/test/core_api_utils_test.cpp
@@ -1173,33 +1173,31 @@ TEST_F(CoreAPIUtilsTest, StopwordsBasics) {
         FAIL();
     }
 
-    nlohmann::json body;
+    req->params["collection"] = "coll1";
+    req->params["q"] = "the";
+    req->params["query_by"] = "title";
+    req->params["stopwords"] = "articles";
 
-    body["searches"] = nlohmann::json::array();
-    nlohmann::json search;
-    search["collection"] = "coll1";
-    search["q"] = "the";
-    search["query_by"] = "title";
-    body["searches"].push_back(search);
-
-    req->body = body.dump();
     nlohmann::json embedded_params;
-    embedded_params["stopwords"] = "articles";
-    req->embedded_params_vec.push_back(embedded_params);
-
-    post_multi_search(req, res);
-    nlohmann::json results = nlohmann::json::parse(res->body)["results"][0];
+    auto now_ts = std::chrono::duration_cast<std::chrono::microseconds>(
+            std::chrono::system_clock::now().time_since_epoch()).count();
+    std::string json_results;
 
+    auto search_op = collectionManager.do_search(req->params, embedded_params, json_results, now_ts);
+    if(!search_op.error().empty()) {
+        LOG(ERROR) << search_op.error();
+    }
+    ASSERT_TRUE(search_op.ok());
+    nlohmann::json results = nlohmann::json::parse(json_results);
     ASSERT_EQ(0, results["hits"].size());
 
     req->params.clear();
-    req->embedded_params_vec.clear();
-    body.clear();
+    json_results.clear();
 
     //when not all words in query are stopwords then it should match the remaining words
     stopword_value = R"(
-        {"stopwords": ["america", "europe"], "locale": "en"}
-    )"_json;
+            {"stopwords": ["america", "europe"], "locale": "en"}
+        )"_json;
 
     req->params["collection"] = "coll1";
     req->params["name"] = "continents";
@@ -1211,54 +1209,73 @@ TEST_F(CoreAPIUtilsTest, StopwordsBasics) {
         FAIL();
     }
 
-    body["searches"] = nlohmann::json::array();
-    search["collection"] = "coll1";
-    search["q"] = "America Man";
-    search["query_by"] = "title";
-    body["searches"].push_back(search);
-
-    req->body = body.dump();
-    embedded_params["stopwords"] = "continents";
-    req->embedded_params_vec.push_back(embedded_params);
-
-    post_multi_search(req, res);
-    results = nlohmann::json::parse(res->body)["results"][0];
+    req->params["q"] = "America Man";
+    req->params["query_by"] = "title";
+    req->params["stopwords"] = "continents";
 
+    search_op = collectionManager.do_search(req->params, embedded_params, json_results, now_ts);
+    ASSERT_TRUE(search_op.ok());
+    results = nlohmann::json::parse(json_results);
     ASSERT_EQ(0, results["hits"].size());
 
     req->params.clear();
-    req->embedded_params_vec.clear();
-    body.clear();
+    json_results.clear();
 
     req->params["collection"] = "coll1";
+    req->params["q"] = "a deadman";
+    req->params["query_by"] = "title";
+    req->params["stopwords"] = "articles";
 
-    body["searches"] = nlohmann::json::array();
-    search["collection"] = "coll1";
-    search["q"] = "a deadman";
-    search["query_by"] = "title";
-    body["searches"].push_back(search);
-
-    req->body = body.dump();
-    embedded_params["stopwords"] = "articles";
-    req->embedded_params_vec.push_back(embedded_params);
-
-    post_multi_search(req, res);
-    results = nlohmann::json::parse(res->body)["results"][0];
-
+    search_op = collectionManager.do_search(req->params, embedded_params, json_results, now_ts);
+    ASSERT_TRUE(search_op.ok());
+    results = nlohmann::json::parse(json_results);
     ASSERT_EQ(2, results["hits"].size());
 
+    req->params.clear();
+    json_results.clear();
+
+    //try deteting nonexisting stopword
+    req->params["collection"] = "coll1";
+    req->params["name"] = "country";
+
+    result = del_stopword(req, res);
+    ASSERT_EQ(404, res->status_code);
+    ASSERT_STREQ("{\"message\": \"stopword `country` not found.\"}", res->body.c_str());
+
+    req->params.clear();
+    json_results.clear();
+
+    //detete stopword and apply in search
+    req->params["collection"] = "coll1";
+    req->params["name"] = "continents";
+
+    result = del_stopword(req, res);
+    if(!result) {
+        LOG(ERROR) << res->body;
+        FAIL();
+    }
+
+    req->params["collection"] = "coll1";
+    req->params["q"] = "America";
+    req->params["query_by"] = "title";
+    req->params["stopwords"] = "continents";
+
+    search_op = collectionManager.do_search(req->params, embedded_params, json_results, now_ts);
+    ASSERT_TRUE(search_op.ok());
+    results = nlohmann::json::parse(json_results);
+    ASSERT_EQ(1, results["hits"].size());
+
     collectionManager.drop_collection("coll1");
 }
 
-
 TEST_F(CoreAPIUtilsTest, StopwordsValidation) {
     nlohmann::json schema = R"({
-        "name": "coll1",
-        "fields": [
-          {"name": "title", "type": "string" },
-          {"name": "points", "type": "int32" }
-        ]
-    })"_json;
+            "name": "coll1",
+            "fields": [
+              {"name": "title", "type": "string" },
+              {"name": "points", "type": "int32" }
+            ]
+        })"_json;
 
     auto op = collectionManager.create_collection(schema);
     ASSERT_TRUE(op.ok());
@@ -1268,8 +1285,8 @@ TEST_F(CoreAPIUtilsTest, StopwordsValidation) {
     std::shared_ptr<http_res> res = std::make_shared<http_res>(nullptr);
 
     auto stopword_value = R"(
-        {"stopwords": ["america", "europe"]}
-    )"_json;
+            {"stopwords": ["america", "europe"]}
+        )"_json;
 
     req->params["collection"] = "coll1";
     req->params["name"] = "continents";
@@ -1277,12 +1294,12 @@ TEST_F(CoreAPIUtilsTest, StopwordsValidation) {
 
     auto result = put_upsert_stopword(req, res);
     ASSERT_EQ(400, res->status_code);
-    ASSERT_STREQ("{\"message\": \"Parameter `locale` is required\"}", res->body.c_str());
+    ASSERT_EQ("{\"message\": \"Parameter `locale` is required\"}", res->body);
 
     //with a typo
     stopword_value = R"(
-        {"stopword": ["america", "europe"], "locale": "en"}
-    )"_json;
+            {"stopword": ["america", "europe"], "locale": "en"}
+        )"_json;
 
     req->params["collection"] = "coll1";
     req->params["name"] = "continents";
@@ -1294,8 +1311,8 @@ TEST_F(CoreAPIUtilsTest, StopwordsValidation) {
 
     //check for value types
     stopword_value = R"(
-        {"stopwords": ["america", "europe"], "locale": 12}
-    )"_json;
+            {"stopwords": ["america", "europe"], "locale": 12}
+        )"_json;
 
     req->params["collection"] = "coll1";
     req->params["name"] = "continents";
@@ -1306,8 +1323,8 @@ TEST_F(CoreAPIUtilsTest, StopwordsValidation) {
     ASSERT_STREQ("{\"message\": \"Parameter `locale` is required as string value\"}", res->body.c_str());
 
     stopword_value = R"(
-        {"stopwords": [1, 5, 2], "locale": "ko"}
-    )"_json;
+            {"stopwords": [1, 5, 2], "locale": "ko"}
+        )"_json;
 
     req->params["collection"] = "coll1";
     req->params["name"] = "continents";
diff --git a/test/stopwords_manager_test.cpp b/test/stopwords_manager_test.cpp
new file mode 100644
index 00000000..d21d3324
--- /dev/null
+++ b/test/stopwords_manager_test.cpp
@@ -0,0 +1,122 @@
+#include <gtest/gtest.h>
+#include "include/sparsepp.h"
+#include "include/stopwords_manager.h"
+#include "include/store.h"
+
+class StopwordsManagerTest : public ::testing::Test {
+protected:
+    Store *store;
+
+    virtual void SetUp() {
+        std::string state_dir_path = "/tmp/typesense_test/stopwords_manager";
+        LOG(INFO) << "Truncating and creating: " << state_dir_path;
+        system(("rm -rf "+state_dir_path+" && mkdir -p "+state_dir_path).c_str());
+        store = new Store(state_dir_path);
+    }
+
+    virtual void TearDown() {
+        delete store;
+    }
+};
+
+TEST_F(StopwordsManagerTest, UpsertGetStopwords) {
+    StopwordsManager stopwordsManager;
+    stopwordsManager.init(store);
+
+    auto stopwords1 = R"(
+            {"stopwords": ["america", "europe"]}
+        )"_json;
+
+    auto upsert_op = stopwordsManager.upsert_stopword("continents", stopwords1["stopwords"], "en");
+    ASSERT_TRUE(upsert_op.ok());
+
+    auto stopwords2 = R"(
+                {"stopwords": ["a", "an", "the"]}
+            )"_json;
+
+    upsert_op = stopwordsManager.upsert_stopword("articles", stopwords2["stopwords"], "en");
+    ASSERT_TRUE(upsert_op.ok());
+
+    auto stopwords3 = R"(
+                {"stopwords": ["India", "United States", "Japan", "China"]}
+            )"_json;
+
+    upsert_op = stopwordsManager.upsert_stopword("countries", stopwords3["stopwords"], "en");
+    ASSERT_TRUE(upsert_op.ok());
+
+    auto stopword_config = stopwordsManager.get_stopwords();
+    ASSERT_EQ(3, stopword_config.size()); //total stopwords set
+    ASSERT_EQ(3, stopword_config["articles"].size());
+    ASSERT_EQ(2, stopword_config["continents"].size());
+    ASSERT_EQ(5, stopword_config["countries"].size()); //with tokenization United States will be splited into two
+}
+
+TEST_F(StopwordsManagerTest, GetStopword) {
+    StopwordsManager stopwordsManager;
+    stopwordsManager.init(store);
+
+    auto stopwords = R"({"stopwords": ["a", "an", "the"]})"_json;
+
+    auto upsert_op = stopwordsManager.upsert_stopword("articles", stopwords["stopwords"], "en");
+    ASSERT_TRUE(upsert_op.ok());
+
+    spp::sparse_hash_set<std::string> stopwords_set;
+
+    auto get_op = stopwordsManager.get_stopword("articles", stopwords_set);
+    ASSERT_TRUE(get_op.ok());
+    ASSERT_EQ(3, stopwords_set.size());
+
+    stopwords_set.clear();
+
+    //try to fetch non-existing stopword
+    get_op = stopwordsManager.get_stopword("country", stopwords_set);
+    ASSERT_FALSE(get_op.ok());
+    ASSERT_EQ(404, get_op.code());
+    ASSERT_EQ("stopword `country` not found.", get_op.error());
+
+    //try fetching stopwords with token
+    stopwords = R"({"stopwords": ["India", "United States", "Japan"]})"_json;
+
+    upsert_op = stopwordsManager.upsert_stopword("country", stopwords["stopwords"], "en");
+    ASSERT_TRUE(upsert_op.ok());
+
+    get_op = stopwordsManager.get_stopword("country", stopwords_set);
+    ASSERT_TRUE(get_op.ok());
+    ASSERT_EQ(4, stopwords_set.size()); //as United States will be tokenized and counted 2 stopwords
+}
+
+TEST_F(StopwordsManagerTest, DeleteStopword) {
+    StopwordsManager stopwordsManager;
+    stopwordsManager.init(store);
+
+    auto stopwords1 = R"(
+                {"stopwords": ["america", "europe"]}
+            )"_json;
+
+    auto upsert_op = stopwordsManager.upsert_stopword("continents", stopwords1["stopwords"], "en");
+    ASSERT_TRUE(upsert_op.ok());
+
+    auto stopwords2 = R"(
+                    {"stopwords": ["a", "an", "the"]}
+                )"_json;
+
+    upsert_op = stopwordsManager.upsert_stopword("articles", stopwords2["stopwords"], "en");
+    ASSERT_TRUE(upsert_op.ok());
+
+    spp::sparse_hash_set<std::string> stopwords_set;
+
+    //delete a stopword
+    auto del_op = stopwordsManager.delete_stopword("articles");
+    ASSERT_TRUE(del_op.ok());
+
+    auto get_op = stopwordsManager.get_stopword("articles", stopwords_set);
+    ASSERT_FALSE(get_op.ok());
+    ASSERT_EQ(404, get_op.code());
+    ASSERT_EQ("stopword `articles` not found.", get_op.error());
+
+    //delete non-existing stopword
+    del_op = stopwordsManager.delete_stopword("states");
+    ASSERT_FALSE(del_op.ok());
+    ASSERT_EQ(404, del_op.code());
+    ASSERT_EQ("Stopword `states` not found.", del_op.error());
+}
\ No newline at end of file