pagination for synonyms (#1591)

* pagination for synonyms

* return synonym_op directly
This commit is contained in:
Krunal Gandhi 2024-03-01 15:53:02 +00:00 committed by GitHub
parent 3f5386eb77
commit cc815297ef
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 227 additions and 24 deletions

View File

@ -626,7 +626,7 @@ public:
// synonym operations
spp::sparse_hash_map<std::string, synonym_t> get_synonyms();
Option<spp::sparse_hash_map<std::string, synonym_t*>> get_synonyms(uint32_t limit=0, uint32_t offset=0);
bool get_synonym(const std::string& id, synonym_t& synonym);

View File

@ -70,7 +70,7 @@ public:
void synonym_reduction(const std::vector<std::string>& tokens,
std::vector<std::vector<std::string>>& results) const;
spp::sparse_hash_map<std::string, synonym_t> get_synonyms();
Option<spp::sparse_hash_map<std::string, synonym_t*>> get_synonyms(uint32_t limit=0, uint32_t offset=0);
bool get_synonym(const std::string& id, synonym_t& synonym);

View File

@ -4750,9 +4750,14 @@ Option<std::map<std::string, override_t*>> Collection::get_overrides(uint32_t li
return Option<std::map<std::string, override_t*>>(overrides_map);
}
spp::sparse_hash_map<std::string, synonym_t> Collection::get_synonyms() {
Option<spp::sparse_hash_map<std::string, synonym_t*>> Collection::get_synonyms(uint32_t limit, uint32_t offset) {
std::shared_lock lock(mutex);
return synonym_index->get_synonyms();
auto synonyms_op =synonym_index->get_synonyms(limit, offset);
if(!synonyms_op.ok()) {
return Option<spp::sparse_hash_map<std::string, synonym_t*>>(synonyms_op.code(), synonyms_op.error());
}
return synonyms_op;
}
SynonymIndex* Collection::get_synonym_index() {

View File

@ -2345,9 +2345,9 @@ Option<Collection*> CollectionManager::clone_collection(const string& existing_n
Collection* new_coll = coll_create_op.get();
// copy synonyms
auto synonyms = existing_coll->get_synonyms();
auto synonyms = existing_coll->get_synonyms().get();
for(const auto& synonym: synonyms) {
new_coll->get_synonym_index()->add_synonym(new_name, synonym.second);
new_coll->get_synonym_index()->add_synonym(new_name, *synonym.second);
}
// copy overrides

View File

@ -2047,13 +2047,37 @@ bool get_synonyms(const std::shared_ptr<http_req>& req, const std::shared_ptr<ht
return false;
}
uint32_t offset = 0, limit = 0;
if(req->params.count("offset") != 0) {
const auto &offset_str = req->params["offset"];
if(!StringUtils::is_uint32_t(offset_str)) {
res->set(400, "Offset param should be unsigned integer.");
return false;
}
offset = std::stoi(offset_str);
}
if(req->params.count("limit") != 0) {
const auto &limit_str = req->params["limit"];
if(!StringUtils::is_uint32_t(limit_str)) {
res->set(400, "Limit param should be unsigned integer.");
return false;
}
limit = std::stoi(limit_str);
}
nlohmann::json res_json;
res_json["synonyms"] = nlohmann::json::array();
const auto& synonyms = collection->get_synonyms();
auto synonyms_op = collection->get_synonyms(limit, offset);
if(!synonyms_op.ok()) {
res->set(synonyms_op.code(), synonyms_op.error());
return false;
}
const auto synonyms = synonyms_op.get();
for(const auto & kv: synonyms) {
nlohmann::json synonym = kv.second.to_view_json();
res_json["synonyms"].push_back(synonym);
res_json["synonyms"].push_back(kv.second->to_view_json());
}
res->set_200(res_json.dump());

View File

@ -179,9 +179,32 @@ Option<bool> SynonymIndex::remove_synonym(const std::string & collection_name, c
return Option<bool>(404, "Could not find that `id`.");
}
spp::sparse_hash_map<std::string, synonym_t> SynonymIndex::get_synonyms() {
Option<spp::sparse_hash_map<std::string, synonym_t*>> SynonymIndex::get_synonyms(uint32_t limit, uint32_t offset) {
std::shared_lock lock(mutex);
return synonym_definitions;
spp::sparse_hash_map<std::string, synonym_t*> synonyms_map;
auto synonym_it = synonym_definitions.begin();
if(offset > 0) {
if(offset >= synonym_definitions.size()) {
return Option<spp::sparse_hash_map<std::string, synonym_t*>>(400, "Invalid offset param.");
}
std::advance(synonym_it, offset);
}
auto synonym_end = synonym_definitions.end();
if(limit > 0 && (offset + limit < synonym_definitions.size())) {
synonym_end = synonym_it;
std::advance(synonym_end, limit);
}
for (synonym_it; synonym_it != synonym_end; ++synonym_it) {
synonyms_map[synonym_it->first] = &synonym_it->second;
}
return Option<spp::sparse_hash_map<std::string, synonym_t*>>(synonyms_map);
}
std::string SynonymIndex::get_synonym_key(const std::string & collection_name, const std::string & synonym_id) {

View File

@ -525,16 +525,16 @@ TEST_F(CollectionManagerTest, RestoreRecordsOnRestart) {
ASSERT_STREQ("exclude-rule", collection1->get_overrides().get()["exclude-rule"]->id.c_str());
ASSERT_STREQ("include-rule", collection1->get_overrides().get()["include-rule"]->id.c_str());
const auto& synonyms = collection1->get_synonyms();
const auto& synonyms = collection1->get_synonyms().get();
ASSERT_EQ(2, synonyms.size());
ASSERT_STREQ("id1", synonyms.at("id1").id.c_str());
ASSERT_EQ(2, synonyms.at("id1").root.size());
ASSERT_EQ(1, synonyms.at("id1").synonyms.size());
ASSERT_STREQ("id1", synonyms.at("id1")->id.c_str());
ASSERT_EQ(2, synonyms.at("id1")->root.size());
ASSERT_EQ(1, synonyms.at("id1")->synonyms.size());
ASSERT_STREQ("id3", synonyms.at("id3").id.c_str());
ASSERT_EQ(0, synonyms.at("id3").root.size());
ASSERT_EQ(2, synonyms.at("id3").synonyms.size());
ASSERT_STREQ("id3", synonyms.at("id3")->id.c_str());
ASSERT_EQ(0, synonyms.at("id3")->root.size());
ASSERT_EQ(2, synonyms.at("id3")->synonyms.size());
std::vector<char> expected_symbols = {'+'};
std::vector<char> expected_separators = {'-'};
@ -1408,7 +1408,7 @@ TEST_F(CollectionManagerTest, CloneCollection) {
ASSERT_FALSE(coll2 == nullptr);
ASSERT_EQ("coll2", coll2->get_name());
ASSERT_EQ(1, coll2->get_fields().size());
ASSERT_EQ(1, coll2->get_synonyms().size());
ASSERT_EQ(1, coll2->get_synonyms().get().size());
ASSERT_EQ(1, coll2->get_overrides().get().size());
ASSERT_EQ("", coll2->get_fallback_field_type());

View File

@ -700,11 +700,11 @@ TEST_F(CollectionSynonymsTest, DeleteAndUpsertDuplicationOfSynonms) {
coll_mul_fields->add_synonym(R"({"id": "ipod-synonyms", "root": "ipod", "synonyms": ["i pod", "ipod"]})"_json);
coll_mul_fields->add_synonym(R"({"id": "samsung-synonyms", "root": "s3", "synonyms": ["s3 phone", "samsung"]})"_json);
ASSERT_EQ(2, coll_mul_fields->get_synonyms().size());
ASSERT_EQ(2, coll_mul_fields->get_synonyms().get().size());
coll_mul_fields->remove_synonym("ipod-synonyms");
ASSERT_EQ(1, coll_mul_fields->get_synonyms().size());
ASSERT_STREQ("samsung-synonyms", coll_mul_fields->get_synonyms()["samsung-synonyms"].id.c_str());
ASSERT_EQ(1, coll_mul_fields->get_synonyms().get().size());
ASSERT_STREQ("samsung-synonyms", coll_mul_fields->get_synonyms().get()["samsung-synonyms"]->id.c_str());
// try to upsert synonym with same ID
@ -712,7 +712,7 @@ TEST_F(CollectionSynonymsTest, DeleteAndUpsertDuplicationOfSynonms) {
"synonyms": ["s3 phone", "samsung"]})"_json);
ASSERT_TRUE(upsert_op.ok());
ASSERT_EQ(1, coll_mul_fields->get_synonyms().size());
ASSERT_EQ(1, coll_mul_fields->get_synonyms().get().size());
synonym_t synonym2_updated;
coll_mul_fields->get_synonym("samsung-synonyms", synonym2_updated);
@ -721,7 +721,7 @@ TEST_F(CollectionSynonymsTest, DeleteAndUpsertDuplicationOfSynonms) {
ASSERT_EQ("smartphone", synonym2_updated.root[1]);
coll_mul_fields->remove_synonym("samsung-synonyms");
ASSERT_EQ(0, coll_mul_fields->get_synonyms().size());
ASSERT_EQ(0, coll_mul_fields->get_synonyms().get().size());
}
TEST_F(CollectionSynonymsTest, SynonymJsonSerialization) {
@ -1066,4 +1066,97 @@ TEST_F(CollectionSynonymsTest, MultipleSynonymSubstitution) {
res = coll2->search("suit man", {"title", "gender"}, "", {},
{}, {0}, 10, 1, FREQUENCY, {true}, 0).get();
ASSERT_EQ(1, res["hits"].size());
}
TEST_F(CollectionSynonymsTest, SynonymsPagination) {
Collection *coll3;
std::vector<field> fields = {field("title", field_types::STRING, false),
field("points", field_types::INT32, false)};
coll3 = collectionManager.get_collection("coll3").get();
if (coll3 == nullptr) {
coll3 = collectionManager.create_collection("coll3", 1, fields, "points").get();
}
for (int i = 0; i < 5; ++i) {
nlohmann::json synonym_json = R"(
{
"id": "foobar",
"synonyms": ["blazer", "suit"]
})"_json;
synonym_json["id"] = synonym_json["id"].get<std::string>() + std::to_string(i + 1);
coll3->add_synonym(synonym_json);
}
uint32_t limit = 0, offset = 0;
//limit collections by 2
limit = 2;
auto synonym_op = coll3->get_synonyms(limit);
auto synonym_map = synonym_op.get();
auto it = synonym_map.begin();
ASSERT_EQ(2, synonym_map.size());
ASSERT_EQ("foobar4", it->second->id); it++;
ASSERT_EQ("foobar5", it->second->id);
//get 2 collection from offset 3
offset = 3;
synonym_op = coll3->get_synonyms(limit, offset);
synonym_map = synonym_op.get();
it = synonym_map.begin();
ASSERT_EQ(2, synonym_map.size());
ASSERT_EQ("foobar3", it->second->id); it++;
ASSERT_EQ("foobar2", it->second->id);
//get all collection except first
offset = 1;
limit = 0;
synonym_op = coll3->get_synonyms(limit, offset);
synonym_map = synonym_op.get();
it = synonym_map.begin();
ASSERT_EQ(4, synonym_map.size());
ASSERT_EQ("foobar5", it->second->id); it++;
ASSERT_EQ("foobar1", it->second->id); it++;
ASSERT_EQ("foobar3", it->second->id); it++;
ASSERT_EQ("foobar2", it->second->id); it++;
//get last collection
offset = 4, limit = 1;
synonym_op = coll3->get_synonyms(limit, offset);
synonym_map = synonym_op.get();
it = synonym_map.begin();
ASSERT_EQ(1, synonym_map.size());
ASSERT_EQ("foobar2", it->second->id);
//if limit is greater than number of collection then return all from offset
offset = 0;
limit = 8;
synonym_op = coll3->get_synonyms(limit, offset);
synonym_map = synonym_op.get();
it = synonym_map.begin();
ASSERT_EQ(5, synonym_map.size());
ASSERT_EQ("foobar4", it->second->id); it++;
ASSERT_EQ("foobar5", it->second->id); it++;
ASSERT_EQ("foobar1", it->second->id); it++;
ASSERT_EQ("foobar3", it->second->id); it++;
ASSERT_EQ("foobar2", it->second->id); it++;
offset = 3;
limit = 4;
synonym_op = coll3->get_synonyms(limit, offset);
synonym_map = synonym_op.get();
it = synonym_map.begin();
ASSERT_EQ(2, synonym_map.size());
ASSERT_EQ("foobar3", it->second->id); it++;
ASSERT_EQ("foobar2", it->second->id);
//invalid offset
offset = 6;
limit = 0;
synonym_op = coll3->get_synonyms(limit, offset);
ASSERT_FALSE(synonym_op.ok());
ASSERT_EQ("Invalid offset param.", synonym_op.error());
}

View File

@ -1694,6 +1694,64 @@ TEST_F(CoreAPIUtilsTest, OverridesPagination) {
ASSERT_EQ(400, resp->status_code);
ASSERT_EQ("{\"message\": \"Offset param should be unsigned integer.\"}", resp->body);
//invalid limit string
req->params["offset"] = "0";
req->params["limit"] = "-1";
get_collections(req, resp);
ASSERT_EQ(400, resp->status_code);
ASSERT_EQ("{\"message\": \"Limit param should be unsigned integer.\"}", resp->body);
}
TEST_F(CoreAPIUtilsTest, SynonymsPagination) {
Collection *coll3;
std::vector<field> fields = {field("title", field_types::STRING, false),
field("points", field_types::INT32, false)};
coll3 = collectionManager.get_collection("coll3").get();
if (coll3 == nullptr) {
coll3 = collectionManager.create_collection("coll3", 1, fields, "points").get();
}
for (int i = 0; i < 5; ++i) {
nlohmann::json synonym_json = R"(
{
"id": "foobar",
"synonyms": ["blazer", "suit"]
})"_json;
synonym_json["id"] = synonym_json["id"].get<std::string>() + std::to_string(i + 1);
coll3->add_synonym(synonym_json);
}
auto req = std::make_shared<http_req>();
auto resp = std::make_shared<http_res>(nullptr);
req->params["collection"] = "coll3";
req->params["offset"] = "0";
req->params["limit"] = "1";
get_synonyms(req, resp);
nlohmann::json expected_json = R"({
"synonyms":[
{
"id":"foobar4",
"root":"",
"synonyms":["blazer","suit"]
}]
})"_json;
ASSERT_EQ(expected_json.dump(), resp->body);
//invalid offset string
req->params["offset"] = "0a";
get_collections(req, resp);
ASSERT_EQ(400, resp->status_code);
ASSERT_EQ("{\"message\": \"Offset param should be unsigned integer.\"}", resp->body);
//invalid limit string
req->params["offset"] = "0";
req->params["limit"] = "-1";