mirror of
https://github.com/typesense/typesense.git
synced 2025-05-17 04:02:36 +08:00
Fix synonym loading regression.
This commit is contained in:
parent
7a63d930bf
commit
44d489ed54
@ -10,19 +10,20 @@
|
||||
|
||||
struct synonym_t {
|
||||
std::string id;
|
||||
|
||||
std::string raw_root;
|
||||
// used in code and differs from API + storage format
|
||||
std::vector<std::string> root;
|
||||
|
||||
std::vector<std::string> raw_synonyms;
|
||||
// used in code and differs from API + storage format
|
||||
std::vector<std::vector<std::string>> synonyms;
|
||||
|
||||
std::string locale;
|
||||
std::vector<char> symbols;
|
||||
|
||||
synonym_t() = default;
|
||||
|
||||
synonym_t(const std::string& id, const std::vector<std::string>& root,
|
||||
const std::vector<std::vector<std::string>>& synonyms):
|
||||
id(id), root(root), synonyms(synonyms) {
|
||||
|
||||
}
|
||||
|
||||
nlohmann::json to_view_json() const;
|
||||
|
||||
static Option<bool> parse(const nlohmann::json& synonym_json, synonym_t& syn);
|
||||
|
@ -1579,6 +1579,27 @@ bool put_synonym(const std::shared_ptr<http_req>& req, const std::shared_ptr<htt
|
||||
return false;
|
||||
}
|
||||
|
||||
// These checks should be inside `add_synonym` but older versions of Typesense wrongly persisted
|
||||
// `root` as an array, so we have to do it here so that on-disk synonyms are loaded properly
|
||||
if(syn_json.count("root") != 0 && !syn_json["root"].is_string()) {
|
||||
res->set_400("Key `root` should be a string.");
|
||||
return false;
|
||||
}
|
||||
|
||||
if(syn_json.count("synonyms") && syn_json["synonyms"].is_array()) {
|
||||
if(syn_json["synonyms"].empty()) {
|
||||
res->set_400("Could not find a valid string array of `synonyms`");
|
||||
return false;
|
||||
}
|
||||
|
||||
for(const auto& synonym: syn_json["synonyms"]) {
|
||||
if (!synonym.is_string() || synonym.empty()) {
|
||||
res->set_400("Could not find a valid string array of `synonyms`");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
syn_json["id"] = synonym_id;
|
||||
Option<bool> upsert_op = collection->add_synonym(syn_json);
|
||||
|
||||
|
@ -194,10 +194,6 @@ Option<bool> synonym_t::parse(const nlohmann::json& synonym_json, synonym_t& syn
|
||||
return Option<bool>(400, "Could not find an array of `synonyms`");
|
||||
}
|
||||
|
||||
if(synonym_json.count("root") != 0 && !synonym_json["root"].is_string()) {
|
||||
return Option<bool>(400, "Key `root` should be a string.");
|
||||
}
|
||||
|
||||
if (!synonym_json["synonyms"].is_array() || synonym_json["synonyms"].empty()) {
|
||||
return Option<bool>(400, "Could not find an array of `synonyms`");
|
||||
}
|
||||
@ -228,17 +224,51 @@ Option<bool> synonym_t::parse(const nlohmann::json& synonym_json, synonym_t& syn
|
||||
|
||||
if(synonym_json.count("root") != 0) {
|
||||
std::vector<std::string> tokens;
|
||||
Tokenizer(synonym_json["root"], true, false, syn.locale, syn.symbols).tokenize(tokens);
|
||||
|
||||
if(synonym_json["root"].is_string()) {
|
||||
Tokenizer(synonym_json["root"].get<std::string>(), true, false, syn.locale, syn.symbols).tokenize(tokens);
|
||||
syn.raw_root = synonym_json["root"].get<std::string>();
|
||||
} else if(synonym_json["root"].is_array()) {
|
||||
// Typesense 0.23.1 and below incorrectly stored root as array
|
||||
for(const auto& root_ele: synonym_json["root"]) {
|
||||
if(!root_ele.is_string()) {
|
||||
return Option<bool>(400, "Synonym root is not valid.");
|
||||
}
|
||||
|
||||
tokens.push_back(root_ele.get<std::string>());
|
||||
}
|
||||
|
||||
syn.raw_root = StringUtils::join(tokens, " ");
|
||||
} else {
|
||||
return Option<bool>(400, "Key `root` should be a string.");
|
||||
}
|
||||
|
||||
syn.root = tokens;
|
||||
}
|
||||
|
||||
for(const auto& synonym: synonym_json["synonyms"]) {
|
||||
if(!synonym.is_string() || synonym == "") {
|
||||
std::vector<std::string> tokens;
|
||||
if(synonym.is_string()) {
|
||||
Tokenizer(synonym.get<std::string>(), true, false, syn.locale, syn.symbols).tokenize(tokens);
|
||||
syn.raw_synonyms.push_back(synonym.get<std::string>());
|
||||
} else if(synonym.is_array()) {
|
||||
// Typesense 0.23.1 and below incorrectly stored synonym as array
|
||||
if(synonym.empty()) {
|
||||
return Option<bool>(400, "Could not find a valid string array of `synonyms`");
|
||||
}
|
||||
|
||||
for(const auto& ele: synonym) {
|
||||
if(!ele.is_string() || ele.get<std::string>().empty()) {
|
||||
return Option<bool>(400, "Could not find a valid string array of `synonyms`");
|
||||
}
|
||||
tokens.push_back(ele.get<std::string>());
|
||||
}
|
||||
|
||||
syn.raw_synonyms.push_back(StringUtils::join(tokens, " "));
|
||||
} else {
|
||||
return Option<bool>(400, "Could not find a valid string array of `synonyms`");
|
||||
}
|
||||
|
||||
std::vector<std::string> tokens;
|
||||
Tokenizer(synonym, true, false, syn.locale, syn.symbols).tokenize(tokens);
|
||||
syn.synonyms.push_back(tokens);
|
||||
}
|
||||
|
||||
@ -249,12 +279,12 @@ Option<bool> synonym_t::parse(const nlohmann::json& synonym_json, synonym_t& syn
|
||||
nlohmann::json synonym_t::to_view_json() const {
|
||||
nlohmann::json obj;
|
||||
obj["id"] = id;
|
||||
obj["root"] = StringUtils::join(root, " ");
|
||||
obj["root"] = raw_root;
|
||||
|
||||
obj["synonyms"] = nlohmann::json::array();
|
||||
|
||||
for(const auto& synonym: synonyms) {
|
||||
obj["synonyms"].push_back(StringUtils::join(synonym, " "));
|
||||
for(const auto& synonym: raw_synonyms) {
|
||||
obj["synonyms"].push_back(synonym);
|
||||
}
|
||||
|
||||
if(!locale.empty()) {
|
||||
|
@ -406,13 +406,9 @@ TEST_F(CollectionManagerTest, RestoreRecordsOnRestart) {
|
||||
collection1->remove_override("deleted-rule");
|
||||
|
||||
// make some synonym operation
|
||||
synonym_t synonym1("id1", {"smart", "phone"}, {{"iphone"}});
|
||||
synonym_t synonym2("id2", {"mobile", "phone"}, {{"samsung", "phone"}});
|
||||
synonym_t synonym3("id3", {}, {{"football"}, {"foot", "ball"}});
|
||||
|
||||
ASSERT_TRUE(collection1->add_synonym(synonym1.to_view_json()).ok());
|
||||
ASSERT_TRUE(collection1->add_synonym(synonym2.to_view_json()).ok());
|
||||
ASSERT_TRUE(collection1->add_synonym(synonym3.to_view_json()).ok());
|
||||
ASSERT_TRUE(collection1->add_synonym(R"({"id": "id1", "root": "smart phone", "synonyms": ["iphone"]})"_json).ok());
|
||||
ASSERT_TRUE(collection1->add_synonym(R"({"id": "id2", "root": "mobile phone", "synonyms": ["samsung phone"]})"_json).ok());
|
||||
ASSERT_TRUE(collection1->add_synonym(R"({"id": "id3", "synonyms": ["football", "foot ball"]})"_json).ok());
|
||||
|
||||
collection1->remove_synonym("id2");
|
||||
|
||||
|
@ -2064,12 +2064,9 @@ TEST_F(CollectionOverrideTest, DynamicFilteringWithSynonyms) {
|
||||
ASSERT_TRUE(coll1->add(doc2.dump()).ok());
|
||||
ASSERT_TRUE(coll1->add(doc3.dump()).ok());
|
||||
|
||||
synonym_t synonym1{"sneakers-shoes", {"sneakers"}, {{"shoes"}} };
|
||||
synonym_t synonym2{"boots-shoes", {"boots"}, {{"shoes"}} };
|
||||
synonym_t synonym3{"exciting-amazing", {"exciting"}, {{"amazing"}} };
|
||||
coll1->add_synonym(synonym1.to_view_json());
|
||||
coll1->add_synonym(synonym2.to_view_json());
|
||||
coll1->add_synonym(synonym3.to_view_json());
|
||||
coll1->add_synonym(R"({"id": "sneakers-shoes", "root": "sneakers", "synonyms": ["shoes"]})"_json);
|
||||
coll1->add_synonym(R"({"id": "boots-shoes", "root": "boots", "synonyms": ["shoes"]})"_json);
|
||||
coll1->add_synonym(R"({"id": "exciting-amazing", "root": "exciting", "synonyms": ["amazing"]})"_json);
|
||||
|
||||
std::vector<sort_by> sort_fields = { sort_by("_text_match", "DESC"), sort_by("points", "DESC") };
|
||||
|
||||
@ -2514,8 +2511,7 @@ TEST_F(CollectionOverrideTest, SynonymsAppliedToOverridenQuery) {
|
||||
|
||||
coll1->add_override(override_contains);
|
||||
|
||||
synonym_t synonym1{"shoes-sneakers", {"shoes"}, {{"sneakers"}} };
|
||||
coll1->add_synonym(synonym1.to_view_json());
|
||||
coll1->add_synonym(R"({"id": "", "root": "shoes", "synonyms": ["sneakers"]})"_json);
|
||||
|
||||
auto results = coll1->search("expensive shoes", {"name"}, "",
|
||||
{}, sort_fields, {2}, 10, 1, FREQUENCY, {true}, 0).get();
|
||||
|
@ -123,26 +123,16 @@ TEST_F(CollectionSynonymsTest, SynonymParsingFromJson) {
|
||||
|
||||
// synonyms bad type
|
||||
|
||||
nlohmann::json syn_json_bad_type1 = {
|
||||
{"id", "syn-1"},
|
||||
{"root", "Ocean"},
|
||||
{"synonyms", {"Sea", 1} }
|
||||
};
|
||||
nlohmann::json syn_json_bad_type1 = R"({
|
||||
"id": "syn-1",
|
||||
"root": "Ocean",
|
||||
"synonyms": [["Sea", 1]]
|
||||
})"_json;
|
||||
|
||||
syn_op = synonym_t::parse(syn_json_bad_type1, synonym);
|
||||
ASSERT_FALSE(syn_op.ok());
|
||||
ASSERT_STREQ("Could not find a valid string array of `synonyms`", syn_op.error().c_str());
|
||||
|
||||
nlohmann::json syn_json_bad_type2 = {
|
||||
{"id", "syn-1"},
|
||||
{"root", "Ocean"},
|
||||
{"synonyms", "foo" }
|
||||
};
|
||||
|
||||
syn_op = synonym_t::parse(syn_json_bad_type2, synonym);
|
||||
ASSERT_FALSE(syn_op.ok());
|
||||
ASSERT_STREQ("Could not find an array of `synonyms`", syn_op.error().c_str());
|
||||
|
||||
nlohmann::json syn_json_bad_type3 = {
|
||||
{"id", "syn-1"},
|
||||
{"root", "Ocean"},
|
||||
@ -154,11 +144,11 @@ TEST_F(CollectionSynonymsTest, SynonymParsingFromJson) {
|
||||
ASSERT_STREQ("Could not find an array of `synonyms`", syn_op.error().c_str());
|
||||
|
||||
// empty string in synonym list
|
||||
nlohmann::json syn_json_bad_type4 = {
|
||||
{"id", "syn-1"},
|
||||
{"root", "Ocean"},
|
||||
{"synonyms", {""} }
|
||||
};
|
||||
nlohmann::json syn_json_bad_type4 = R"({
|
||||
"id": "syn-1",
|
||||
"root": "Ocean",
|
||||
"synonyms": [["Foo", ""]]
|
||||
})"_json;
|
||||
|
||||
syn_op = synonym_t::parse(syn_json_bad_type4, synonym);
|
||||
ASSERT_FALSE(syn_op.ok());
|
||||
@ -707,10 +697,8 @@ TEST_F(CollectionSynonymsTest, SynonymFieldOrdering) {
|
||||
}
|
||||
|
||||
TEST_F(CollectionSynonymsTest, DeleteAndUpsertDuplicationOfSynonms) {
|
||||
synonym_t synonym1{"ipod-synonyms", {}, {{"ipod"}, {"i", "pod"}, {"pod"}}};
|
||||
synonym_t synonym2{"samsung-synonyms", {}, {{"s3"}, {"s3", "phone"}, {"samsung"}}};
|
||||
coll_mul_fields->add_synonym(synonym1.to_view_json());
|
||||
coll_mul_fields->add_synonym(synonym2.to_view_json());
|
||||
coll_mul_fields->add_synonym(R"({"id": "ipod-synonyms", "root": "ipod", "synonyms": ["i pod", "ipod"]})"_json);
|
||||
coll_mul_fields->add_synonym(R"({"id": "samsung-synonyms", "root": "s3", "synonyms": ["s3 phone", "samsung"]})"_json);
|
||||
|
||||
ASSERT_EQ(2, coll_mul_fields->get_synonyms().size());
|
||||
coll_mul_fields->remove_synonym("ipod-synonyms");
|
||||
@ -720,14 +708,14 @@ TEST_F(CollectionSynonymsTest, DeleteAndUpsertDuplicationOfSynonms) {
|
||||
|
||||
// try to upsert synonym with same ID
|
||||
|
||||
synonym2.root = {"s3", "smartphone"};
|
||||
auto upsert_op = coll_mul_fields->add_synonym(synonym2.to_view_json());
|
||||
auto upsert_op = coll_mul_fields->add_synonym(R"({"id": "samsung-synonyms", "root": "s3 smartphone",
|
||||
"synonyms": ["s3 phone", "samsung"]})"_json);
|
||||
ASSERT_TRUE(upsert_op.ok());
|
||||
|
||||
ASSERT_EQ(1, coll_mul_fields->get_synonyms().size());
|
||||
|
||||
synonym_t synonym2_updated;
|
||||
coll_mul_fields->get_synonym(synonym2.id, synonym2_updated);
|
||||
coll_mul_fields->get_synonym("samsung-synonyms", synonym2_updated);
|
||||
|
||||
ASSERT_EQ("s3", synonym2_updated.root[0]);
|
||||
ASSERT_EQ("smartphone", synonym2_updated.root[1]);
|
||||
@ -737,7 +725,16 @@ TEST_F(CollectionSynonymsTest, DeleteAndUpsertDuplicationOfSynonms) {
|
||||
}
|
||||
|
||||
TEST_F(CollectionSynonymsTest, SynonymJsonSerialization) {
|
||||
synonym_t synonym1{"ipod-synonyms", {"apple", "ipod"}, {{"ipod"}, {"i", "pod"}, {"pod"}}};
|
||||
synonym_t synonym1;
|
||||
synonym1.id = "ipod-synonyms";
|
||||
synonym1.root = {"apple", "ipod"};
|
||||
synonym1.raw_root = "apple ipod";
|
||||
|
||||
synonym1.raw_synonyms = {"ipod", "i pod", "pod"};
|
||||
synonym1.synonyms.push_back({"ipod"});
|
||||
synonym1.synonyms.push_back({"i", "pod"});
|
||||
synonym1.synonyms.push_back({"pod"});
|
||||
|
||||
nlohmann::json obj = synonym1.to_view_json();
|
||||
ASSERT_STREQ("ipod-synonyms", obj["id"].get<std::string>().c_str());
|
||||
ASSERT_STREQ("apple ipod", obj["root"].get<std::string>().c_str());
|
||||
@ -817,8 +814,9 @@ TEST_F(CollectionSynonymsTest, SynonymExpansionAndCompressionRanking) {
|
||||
ASSERT_TRUE(coll1->add(doc.dump()).ok());
|
||||
}
|
||||
|
||||
synonym_t synonym1{"syn-1", {"lululemon"}, {{"lulu", "lemon"}}};
|
||||
coll1->add_synonym(synonym1.to_view_json());
|
||||
nlohmann::json foo = R"({"id": "syn-1", "root": "lululemon", "synonyms": ["lulu lemon"]})"_json;
|
||||
|
||||
coll1->add_synonym(R"({"id": "syn-1", "root": "lululemon", "synonyms": ["lulu lemon"]})"_json);
|
||||
|
||||
auto res = coll1->search("lululemon", {"title"}, "", {}, {}, {2}, 10, 1, FREQUENCY, {true}, 0).get();
|
||||
ASSERT_EQ(2, res["hits"].size());
|
||||
@ -832,9 +830,7 @@ TEST_F(CollectionSynonymsTest, SynonymExpansionAndCompressionRanking) {
|
||||
ASSERT_EQ(res["hits"][0]["text_match"].get<size_t>(), res["hits"][1]["text_match"].get<size_t>());
|
||||
|
||||
// now with compression synonym
|
||||
synonym1.root = {"lulu", "lemon"};
|
||||
synonym1.synonyms = {{"lululemon"}};
|
||||
coll1->add_synonym(synonym1.to_view_json());
|
||||
coll1->add_synonym(R"({"id": "syn-1", "root": "lulu lemon", "synonyms": ["lululemon"]})"_json);
|
||||
|
||||
res = coll1->search("lulu lemon", {"title"}, "", {}, {}, {2}, 10, 1, FREQUENCY, {true}, 0).get();
|
||||
ASSERT_EQ(2, res["hits"].size());
|
||||
@ -876,7 +872,7 @@ TEST_F(CollectionSynonymsTest, SynonymQueriesMustHavePrefixEnabled) {
|
||||
}
|
||||
|
||||
synonym_t synonym1{"syn-1", {"ns"}, {{"nonstick"}}};
|
||||
coll1->add_synonym(synonym1.to_view_json());
|
||||
coll1->add_synonym(R"({"id": "syn-1", "root": "ns", "synonyms": ["nonstick"]})"_json);
|
||||
|
||||
auto res = coll1->search("ns cook", {"title"}, "", {}, {}, {2}, 10, 1, FREQUENCY, {true}, 0).get();
|
||||
ASSERT_EQ(1, res["hits"].size());
|
||||
|
@ -354,6 +354,4 @@ TEST(TokenizerTest, ShouldTokenizeWithDifferentSymbolConfigs) {
|
||||
ASSERT_EQ("ความ", tokens[0]);
|
||||
ASSERT_EQ("เหลื่อม", tokens[1]);
|
||||
ASSERT_EQ("ล้ํา", tokens[2]);
|
||||
|
||||
LOG(INFO) << "here";
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user