updating and adding more tests

This commit is contained in:
krunal1313 2023-07-13 15:44:40 +05:30
parent a2796450ae
commit 05504b1f4d
4 changed files with 207 additions and 63 deletions

View File

@ -2433,7 +2433,8 @@ void Collection::parse_search_query(const std::string &query, std::vector<std::s
if(!stopwords_set.empty()) {
const auto &stopword_op = StopwordsManager::get_instance().get_stopword(stopwords_set, stopwords_list);
if (!stopword_op.ok()) {
LOG(ERROR) << "Error fetching stopword_list for stopword " << stopwords_set << " "<<stopword_op.error();
LOG(ERROR) << stopword_op.error();
LOG(ERROR) << "Error fetching stopword_list for stopword " << stopwords_set;
}
}

View File

@ -19,7 +19,7 @@ Option<bool> StopwordsManager::get_stopword(const std::string& stopword_name, sp
return Option<bool>(true);
}
return Option<bool>(404, "Not found.");
return Option<bool>(404, "stopword `" + stopword_name +"` not found.");
}
Option<bool> StopwordsManager::upsert_stopword(const std::string& stopword_name, const nlohmann::json& stopwords, const std::string& locale) {
@ -57,6 +57,10 @@ Option<bool> StopwordsManager::delete_stopword(const std::string& stopword_name)
return Option<bool>(500, "Unable to delete from store.");
}
if(stopword_configs.find(stopword_name) == stopword_configs.end()) {
return Option<bool>(404, "Stopword `" + stopword_name + "` not found.");
}
stopword_configs.erase(stopword_name);
return Option<bool>(true);
}

View File

@ -1173,33 +1173,31 @@ TEST_F(CoreAPIUtilsTest, StopwordsBasics) {
FAIL();
}
nlohmann::json body;
req->params["collection"] = "coll1";
req->params["q"] = "the";
req->params["query_by"] = "title";
req->params["stopwords"] = "articles";
body["searches"] = nlohmann::json::array();
nlohmann::json search;
search["collection"] = "coll1";
search["q"] = "the";
search["query_by"] = "title";
body["searches"].push_back(search);
req->body = body.dump();
nlohmann::json embedded_params;
embedded_params["stopwords"] = "articles";
req->embedded_params_vec.push_back(embedded_params);
post_multi_search(req, res);
nlohmann::json results = nlohmann::json::parse(res->body)["results"][0];
auto now_ts = std::chrono::duration_cast<std::chrono::microseconds>(
std::chrono::system_clock::now().time_since_epoch()).count();
std::string json_results;
auto search_op = collectionManager.do_search(req->params, embedded_params, json_results, now_ts);
if(!search_op.error().empty()) {
LOG(ERROR) << search_op.error();
}
ASSERT_TRUE(search_op.ok());
nlohmann::json results = nlohmann::json::parse(json_results);
ASSERT_EQ(0, results["hits"].size());
req->params.clear();
req->embedded_params_vec.clear();
body.clear();
json_results.clear();
//when not all words in query are stopwords then it should match the remaining words
stopword_value = R"(
{"stopwords": ["america", "europe"], "locale": "en"}
)"_json;
{"stopwords": ["america", "europe"], "locale": "en"}
)"_json;
req->params["collection"] = "coll1";
req->params["name"] = "continents";
@ -1211,54 +1209,73 @@ TEST_F(CoreAPIUtilsTest, StopwordsBasics) {
FAIL();
}
body["searches"] = nlohmann::json::array();
search["collection"] = "coll1";
search["q"] = "America Man";
search["query_by"] = "title";
body["searches"].push_back(search);
req->body = body.dump();
embedded_params["stopwords"] = "continents";
req->embedded_params_vec.push_back(embedded_params);
post_multi_search(req, res);
results = nlohmann::json::parse(res->body)["results"][0];
req->params["q"] = "America Man";
req->params["query_by"] = "title";
req->params["stopwords"] = "continents";
search_op = collectionManager.do_search(req->params, embedded_params, json_results, now_ts);
ASSERT_TRUE(search_op.ok());
results = nlohmann::json::parse(json_results);
ASSERT_EQ(0, results["hits"].size());
req->params.clear();
req->embedded_params_vec.clear();
body.clear();
json_results.clear();
req->params["collection"] = "coll1";
req->params["q"] = "a deadman";
req->params["query_by"] = "title";
req->params["stopwords"] = "articles";
body["searches"] = nlohmann::json::array();
search["collection"] = "coll1";
search["q"] = "a deadman";
search["query_by"] = "title";
body["searches"].push_back(search);
req->body = body.dump();
embedded_params["stopwords"] = "articles";
req->embedded_params_vec.push_back(embedded_params);
post_multi_search(req, res);
results = nlohmann::json::parse(res->body)["results"][0];
search_op = collectionManager.do_search(req->params, embedded_params, json_results, now_ts);
ASSERT_TRUE(search_op.ok());
results = nlohmann::json::parse(json_results);
ASSERT_EQ(2, results["hits"].size());
req->params.clear();
json_results.clear();
//try deteting nonexisting stopword
req->params["collection"] = "coll1";
req->params["name"] = "country";
result = del_stopword(req, res);
ASSERT_EQ(404, res->status_code);
ASSERT_STREQ("{\"message\": \"stopword `country` not found.\"}", res->body.c_str());
req->params.clear();
json_results.clear();
//detete stopword and apply in search
req->params["collection"] = "coll1";
req->params["name"] = "continents";
result = del_stopword(req, res);
if(!result) {
LOG(ERROR) << res->body;
FAIL();
}
req->params["collection"] = "coll1";
req->params["q"] = "America";
req->params["query_by"] = "title";
req->params["stopwords"] = "continents";
search_op = collectionManager.do_search(req->params, embedded_params, json_results, now_ts);
ASSERT_TRUE(search_op.ok());
results = nlohmann::json::parse(json_results);
ASSERT_EQ(1, results["hits"].size());
collectionManager.drop_collection("coll1");
}
TEST_F(CoreAPIUtilsTest, StopwordsValidation) {
nlohmann::json schema = R"({
"name": "coll1",
"fields": [
{"name": "title", "type": "string" },
{"name": "points", "type": "int32" }
]
})"_json;
"name": "coll1",
"fields": [
{"name": "title", "type": "string" },
{"name": "points", "type": "int32" }
]
})"_json;
auto op = collectionManager.create_collection(schema);
ASSERT_TRUE(op.ok());
@ -1268,8 +1285,8 @@ TEST_F(CoreAPIUtilsTest, StopwordsValidation) {
std::shared_ptr<http_res> res = std::make_shared<http_res>(nullptr);
auto stopword_value = R"(
{"stopwords": ["america", "europe"]}
)"_json;
{"stopwords": ["america", "europe"]}
)"_json;
req->params["collection"] = "coll1";
req->params["name"] = "continents";
@ -1277,12 +1294,12 @@ TEST_F(CoreAPIUtilsTest, StopwordsValidation) {
auto result = put_upsert_stopword(req, res);
ASSERT_EQ(400, res->status_code);
ASSERT_STREQ("{\"message\": \"Parameter `locale` is required\"}", res->body.c_str());
ASSERT_EQ("{\"message\": \"Parameter `locale` is required\"}", res->body);
//with a typo
stopword_value = R"(
{"stopword": ["america", "europe"], "locale": "en"}
)"_json;
{"stopword": ["america", "europe"], "locale": "en"}
)"_json;
req->params["collection"] = "coll1";
req->params["name"] = "continents";
@ -1294,8 +1311,8 @@ TEST_F(CoreAPIUtilsTest, StopwordsValidation) {
//check for value types
stopword_value = R"(
{"stopwords": ["america", "europe"], "locale": 12}
)"_json;
{"stopwords": ["america", "europe"], "locale": 12}
)"_json;
req->params["collection"] = "coll1";
req->params["name"] = "continents";
@ -1306,8 +1323,8 @@ TEST_F(CoreAPIUtilsTest, StopwordsValidation) {
ASSERT_STREQ("{\"message\": \"Parameter `locale` is required as string value\"}", res->body.c_str());
stopword_value = R"(
{"stopwords": [1, 5, 2], "locale": "ko"}
)"_json;
{"stopwords": [1, 5, 2], "locale": "ko"}
)"_json;
req->params["collection"] = "coll1";
req->params["name"] = "continents";

View File

@ -0,0 +1,122 @@
#include <gtest/gtest.h>
#include "include/sparsepp.h"
#include "include/stopwords_manager.h"
#include "include/store.h"
class StopwordsManagerTest : public ::testing::Test {
protected:
Store *store;
virtual void SetUp() {
std::string state_dir_path = "/tmp/typesense_test/stopwords_manager";
LOG(INFO) << "Truncating and creating: " << state_dir_path;
system(("rm -rf "+state_dir_path+" && mkdir -p "+state_dir_path).c_str());
store = new Store(state_dir_path);
}
virtual void TearDown() {
delete store;
}
};
TEST_F(StopwordsManagerTest, UpsertGetStopwords) {
StopwordsManager stopwordsManager;
stopwordsManager.init(store);
auto stopwords1 = R"(
{"stopwords": ["america", "europe"]}
)"_json;
auto upsert_op = stopwordsManager.upsert_stopword("continents", stopwords1["stopwords"], "en");
ASSERT_TRUE(upsert_op.ok());
auto stopwords2 = R"(
{"stopwords": ["a", "an", "the"]}
)"_json;
upsert_op = stopwordsManager.upsert_stopword("articles", stopwords2["stopwords"], "en");
ASSERT_TRUE(upsert_op.ok());
auto stopwords3 = R"(
{"stopwords": ["India", "United States", "Japan", "China"]}
)"_json;
upsert_op = stopwordsManager.upsert_stopword("countries", stopwords3["stopwords"], "en");
ASSERT_TRUE(upsert_op.ok());
auto stopword_config = stopwordsManager.get_stopwords();
ASSERT_EQ(3, stopword_config.size()); //total stopwords set
ASSERT_EQ(3, stopword_config["articles"].size());
ASSERT_EQ(2, stopword_config["continents"].size());
ASSERT_EQ(5, stopword_config["countries"].size()); //with tokenization United States will be splited into two
}
TEST_F(StopwordsManagerTest, GetStopword) {
StopwordsManager stopwordsManager;
stopwordsManager.init(store);
auto stopwords = R"({"stopwords": ["a", "an", "the"]})"_json;
auto upsert_op = stopwordsManager.upsert_stopword("articles", stopwords["stopwords"], "en");
ASSERT_TRUE(upsert_op.ok());
spp::sparse_hash_set<std::string> stopwords_set;
auto get_op = stopwordsManager.get_stopword("articles", stopwords_set);
ASSERT_TRUE(get_op.ok());
ASSERT_EQ(3, stopwords_set.size());
stopwords_set.clear();
//try to fetch non-existing stopword
get_op = stopwordsManager.get_stopword("country", stopwords_set);
ASSERT_FALSE(get_op.ok());
ASSERT_EQ(404, get_op.code());
ASSERT_EQ("stopword `country` not found.", get_op.error());
//try fetching stopwords with token
stopwords = R"({"stopwords": ["India", "United States", "Japan"]})"_json;
upsert_op = stopwordsManager.upsert_stopword("country", stopwords["stopwords"], "en");
ASSERT_TRUE(upsert_op.ok());
get_op = stopwordsManager.get_stopword("country", stopwords_set);
ASSERT_TRUE(get_op.ok());
ASSERT_EQ(4, stopwords_set.size()); //as United States will be tokenized and counted 2 stopwords
}
TEST_F(StopwordsManagerTest, DeleteStopword) {
StopwordsManager stopwordsManager;
stopwordsManager.init(store);
auto stopwords1 = R"(
{"stopwords": ["america", "europe"]}
)"_json;
auto upsert_op = stopwordsManager.upsert_stopword("continents", stopwords1["stopwords"], "en");
ASSERT_TRUE(upsert_op.ok());
auto stopwords2 = R"(
{"stopwords": ["a", "an", "the"]}
)"_json;
upsert_op = stopwordsManager.upsert_stopword("articles", stopwords2["stopwords"], "en");
ASSERT_TRUE(upsert_op.ok());
spp::sparse_hash_set<std::string> stopwords_set;
//delete a stopword
auto del_op = stopwordsManager.delete_stopword("articles");
ASSERT_TRUE(del_op.ok());
auto get_op = stopwordsManager.get_stopword("articles", stopwords_set);
ASSERT_FALSE(get_op.ok());
ASSERT_EQ(404, get_op.code());
ASSERT_EQ("stopword `articles` not found.", get_op.error());
//delete non-existing stopword
del_op = stopwordsManager.delete_stopword("states");
ASSERT_FALSE(del_op.ok());
ASSERT_EQ(404, del_op.code());
ASSERT_EQ("Stopword `states` not found.", del_op.error());
}