From d71110f3979eacd35d623aab10982343587dbe71 Mon Sep 17 00:00:00 2001 From: krunal Date: Tue, 19 Sep 2023 21:41:12 +0530 Subject: [PATCH 1/3] adding test for multiple synonym substition in query --- test/collection_synonyms_test.cpp | 59 +++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/test/collection_synonyms_test.cpp b/test/collection_synonyms_test.cpp index c06c7062..6a0b9707 100644 --- a/test/collection_synonyms_test.cpp +++ b/test/collection_synonyms_test.cpp @@ -1008,3 +1008,62 @@ TEST_F(CollectionSynonymsTest, SynonymForKorean) { res = coll1->search("구울", {"title"}, "", {}, {}, {0}, 10, 1, FREQUENCY, {true}, 0).get(); ASSERT_EQ(3, res["hits"].size()); } + +TEST_F(CollectionSynonymsTest, MultipleSynonymSubstitution) { + nlohmann::json schema = R"({ + "name": "coll2", + "fields": [ + {"name": "title", "type": "string"}, + {"name": "gender", "type": "string"} + ] + })"_json; + + auto op = collectionManager.create_collection(schema); + ASSERT_TRUE(op.ok()); + Collection* coll2 = op.get(); + + std::vector> records = { + {"Beautiful Blazer", "Male"}, + }; + + for(size_t i=0; iadd(doc.dump()); + ASSERT_TRUE(add_op.ok()); + } + + nlohmann::json synonym1 = R"({ + "id": "foobar", + "synonyms": ["blazer", "suit"] + })"_json; + + nlohmann::json synonym2 = R"({ + "id": "foobar2", + "synonyms": ["male", "man"] + })"_json; + + + ASSERT_TRUE(coll2->add_synonym(synonym1).ok()); + ASSERT_TRUE(coll2->add_synonym(synonym2).ok()); + + auto res = coll2->search("blazer male", {"title", "gender"}, "", {}, + {}, {0}, 10, 1, FREQUENCY, {true},0).get(); + ASSERT_EQ(1, res["hits"].size()); + + res = coll2->search("blazer man", {"title", "gender"}, "", {}, + {}, {0}, 10, 1, FREQUENCY, {true}, 0).get(); + ASSERT_EQ(1, res["hits"].size()); + + res = coll2->search("suit male", {"title", "gender"}, "", {}, + {}, {0}, 10, 1, FREQUENCY, {true}, 0).get(); + ASSERT_EQ(1, res["hits"].size()); + + res = coll2->search("suit man", {"title", "gender"}, "", {}, + {}, {0}, 10, 1, FREQUENCY, {true}, 0).get(); + ASSERT_EQ(1, res["hits"].size()); +} \ No newline at end of file From 7144b2f67dcc427623f7863bca01194c1b52ab09 Mon Sep 17 00:00:00 2001 From: krunal Date: Thu, 21 Sep 2023 15:27:34 +0530 Subject: [PATCH 2/3] fixing when multiple synonyms are substituted in query --- include/synonym_index.h | 3 ++- src/synonym_index.cpp | 19 +++++++++++-------- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/include/synonym_index.h b/include/synonym_index.h index 55d4dd24..5035a3f9 100644 --- a/include/synonym_index.h +++ b/include/synonym_index.h @@ -56,7 +56,8 @@ private: size_t start_window_size, size_t start_index_pos, std::set& processed_syn_hashes, - std::vector>& results) const; + std::vector>& results, + const std::vector& orig_tokens) const; public: diff --git a/src/synonym_index.cpp b/src/synonym_index.cpp index 676d5816..bc9595cd 100644 --- a/src/synonym_index.cpp +++ b/src/synonym_index.cpp @@ -4,7 +4,8 @@ void SynonymIndex::synonym_reduction_internal(const std::vector& tokens, size_t start_window_size, size_t start_index_pos, std::set& processed_syn_hashes, - std::vector>& results) const { + std::vector>& results, + const std::vector& orig_tokens) const { bool recursed = false; @@ -59,10 +60,11 @@ void SynonymIndex::synonym_reduction_internal(const std::vector& to syn_def_hashes.push_back(token_hash); } - if (syn_def_hash == syn_hash) { - // skip over token matching itself in the group - continue; - } +// if (syn_def_hash == syn_hash) { +// // skip over token matching itself in the group +// LOG(INFO) << "skipping"; +// continue; +// } for (size_t i = start_index + window_len; i < tokens.size(); i++) { new_tokens.push_back(tokens[i]); @@ -80,7 +82,8 @@ void SynonymIndex::synonym_reduction_internal(const std::vector& to } recursed = true; - synonym_reduction_internal(new_tokens, window_len, start_index, processed_syn_hashes, results); + synonym_reduction_internal(new_tokens, window_len, + start_index, processed_syn_hashes, results, orig_tokens); } } } @@ -90,7 +93,7 @@ void SynonymIndex::synonym_reduction_internal(const std::vector& to start_index_pos = 0; } - if(!recursed && !processed_syn_hashes.empty()) { + if(!recursed && !processed_syn_hashes.empty() && tokens != orig_tokens) { results.emplace_back(tokens); } } @@ -102,7 +105,7 @@ void SynonymIndex::synonym_reduction(const std::vector& tokens, } std::set processed_syn_hashes; - synonym_reduction_internal(tokens, tokens.size(), 0, processed_syn_hashes, results); + synonym_reduction_internal(tokens, tokens.size(), 0, processed_syn_hashes, results, tokens); } Option SynonymIndex::add_synonym(const std::string & collection_name, const synonym_t& synonym, From 5dc27ce1521747608340d576ddf8bce27c04de73 Mon Sep 17 00:00:00 2001 From: krunal Date: Fri, 22 Sep 2023 11:56:13 +0530 Subject: [PATCH 3/3] removing commented code --- src/synonym_index.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/synonym_index.cpp b/src/synonym_index.cpp index bc9595cd..cfbde766 100644 --- a/src/synonym_index.cpp +++ b/src/synonym_index.cpp @@ -60,12 +60,6 @@ void SynonymIndex::synonym_reduction_internal(const std::vector& to syn_def_hashes.push_back(token_hash); } -// if (syn_def_hash == syn_hash) { -// // skip over token matching itself in the group -// LOG(INFO) << "skipping"; -// continue; -// } - for (size_t i = start_index + window_len; i < tokens.size(); i++) { new_tokens.push_back(tokens[i]); }