diff --git a/src/collection.cpp b/src/collection.cpp index 98b9f6bd..ab516ce2 100644 --- a/src/collection.cpp +++ b/src/collection.cpp @@ -1374,7 +1374,7 @@ void Collection::parse_search_query(const std::string &query, std::vector custom_symbols = symbols_to_index; custom_symbols.push_back('-'); - Tokenizer(query, true, false, locale, custom_symbols).tokenize(tokens); + Tokenizer(query, true, false, locale, custom_symbols, token_separators).tokenize(tokens); } bool exclude_operator_prior = false; diff --git a/test/collection_specific_test.cpp b/test/collection_specific_test.cpp index 1691cf7f..f80d46c6 100644 --- a/test/collection_specific_test.cpp +++ b/test/collection_specific_test.cpp @@ -975,13 +975,26 @@ TEST_F(CollectionSpecificTest, CustomSeparators) { 1, spp::sparse_hash_set(), spp::sparse_hash_set(), 10, "", 30, 4, "", 1, {}, {}, {}, 0, "", "",{}, 1000, - true, false, true, "", 1000).get(); + true, false, true, "", true).get(); ASSERT_EQ(1, results["hits"].size()); ASSERT_EQ(1, results["hits"][0]["highlights"].size()); ASSERT_EQ("name", results["hits"][0]["highlights"][0]["field"]); ASSERT_EQ("alpha-beta-gamma-omega-zeta", results["hits"][0]["highlights"][0]["snippet"]); + results = coll1->search("gamma-omega", {"name"}, + "", {}, {}, {0}, 10, + 1, FREQUENCY, {false}, + 1, spp::sparse_hash_set(), + spp::sparse_hash_set(), 10, "", 30, 4, "", 1, {}, {}, {}, 0, + "", "",{}, 1000, + true, false, true, "", false).get(); + + ASSERT_EQ(1, results["hits"].size()); + ASSERT_EQ(1, results["hits"][0]["highlights"].size()); + ASSERT_EQ("name", results["hits"][0]["highlights"][0]["field"]); + ASSERT_EQ("alpha-beta-gamma-omega-zeta", results["hits"][0]["highlights"][0]["snippet"].get()); + // ensure that symbols are validated nlohmann::json coll_def; @@ -1038,7 +1051,7 @@ TEST_F(CollectionSpecificTest, CustomSymbolsForIndexing) { 1, spp::sparse_hash_set(), spp::sparse_hash_set(), 10, "", 30, 4, "", 1, {}, {}, {}, 0, "", "",{}, 1000, - true, false, true, "", 1000).get(); + true, false, true, "", false).get(); ASSERT_EQ(1, results["hits"].size()); ASSERT_EQ("0", results["hits"][0]["document"]["id"].get()); @@ -1059,7 +1072,7 @@ TEST_F(CollectionSpecificTest, CustomSymbolsForIndexing) { 1, spp::sparse_hash_set(), spp::sparse_hash_set(), 10, "", 30, 4, "", 1, {}, {}, {}, 0, "", "",{}, 1000, - true, false, true, "", 1000).get(); + true, false, true, "", false).get(); ASSERT_EQ(2, results["hits"].size()); ASSERT_EQ("1", results["hits"][0]["document"]["id"].get()); @@ -1094,6 +1107,108 @@ TEST_F(CollectionSpecificTest, CustomSymbolsForIndexing) { collectionManager.drop_collection("coll1"); } +TEST_F(CollectionSpecificTest, CustomSeparatorsHandleQueryVariations) { + std::vector fields = {field("name", field_types::STRING, false), + field("points", field_types::INT32, false),}; + + Collection* coll1 = collectionManager.create_collection( + "coll1", 1, fields, "points", 0, "", {}, {"-", ".", "*", "&", "/"} + ).get(); + + nlohmann::json doc1; + doc1["id"] = "0"; + doc1["name"] = "1&1 Internet Limited"; + doc1["points"] = 100; + + nlohmann::json doc2; + doc2["id"] = "1"; + doc2["name"] = "bofrost*dienstl"; + doc2["points"] = 100; + + nlohmann::json doc3; + doc3["id"] = "2"; + doc3["name"] = "just...grilled"; + doc3["points"] = 100; + + ASSERT_TRUE(coll1->add(doc1.dump()).ok()); + ASSERT_TRUE(coll1->add(doc2.dump()).ok()); + ASSERT_TRUE(coll1->add(doc3.dump()).ok()); + + auto results = coll1->search("bofrost*dienstl", {"name"}, + "", {}, {}, {0}, 10, + 1, FREQUENCY, {false}, + 1, spp::sparse_hash_set(), + spp::sparse_hash_set(), 10, "", 30, 4, "", 1, {}, {}, {}, 0, + "", "",{}, 1000, + true, false, true, "", false).get(); + + ASSERT_EQ(1, results["hits"].size()); + ASSERT_EQ("1", results["hits"][0]["document"]["id"].get()); + ASSERT_EQ("bofrost*dienstl", results["hits"][0]["highlights"][0]["snippet"].get()); + + results = coll1->search("bofrost * dienstl", {"name"}, + "", {}, {}, {0}, 10, + 1, FREQUENCY, {false}, + 1, spp::sparse_hash_set(), + spp::sparse_hash_set(), 10, "", 30, 4, "", 1, {}, {}, {}, 0, + "", "",{}, 1000, + true, false, true, "", false).get(); + + ASSERT_EQ(1, results["hits"].size()); + ASSERT_EQ("1", results["hits"][0]["document"]["id"].get()); + ASSERT_EQ("bofrost*dienstl", results["hits"][0]["highlights"][0]["snippet"].get()); + + results = coll1->search("bofrost dienstl", {"name"}, + "", {}, {}, {0}, 10, + 1, FREQUENCY, {false}, + 1, spp::sparse_hash_set(), + spp::sparse_hash_set(), 10, "", 30, 4, "", 1, {}, {}, {}, 0, + "", "",{}, 1000, + true, false, true, "", false).get(); + + ASSERT_EQ(1, results["hits"].size()); + ASSERT_EQ("1", results["hits"][0]["document"]["id"].get()); + ASSERT_EQ("bofrost*dienstl", results["hits"][0]["highlights"][0]["snippet"].get()); + + results = coll1->search("1&1", {"name"}, + "", {}, {}, {0}, 10, + 1, FREQUENCY, {false}, + 1, spp::sparse_hash_set(), + spp::sparse_hash_set(), 10, "", 30, 4, "", 1, {}, {}, {}, 0, + "", "",{}, 1000, + true, false, true, "", false).get(); + + ASSERT_EQ(1, results["hits"].size()); + ASSERT_EQ("0", results["hits"][0]["document"]["id"].get()); + ASSERT_EQ("1&1 Internet Limited", results["hits"][0]["highlights"][0]["snippet"].get()); + + results = coll1->search("1 & 1", {"name"}, + "", {}, {}, {0}, 10, + 1, FREQUENCY, {false}, + 1, spp::sparse_hash_set(), + spp::sparse_hash_set(), 10, "", 30, 4, "", 1, {}, {}, {}, 0, + "", "",{}, 1000, + true, false, true, "", false).get(); + + ASSERT_EQ(1, results["hits"].size()); + ASSERT_EQ("0", results["hits"][0]["document"]["id"].get()); + ASSERT_EQ("1&1 Internet Limited", results["hits"][0]["highlights"][0]["snippet"].get()); + + results = coll1->search("just grilled", {"name"}, + "", {}, {}, {0}, 10, + 1, FREQUENCY, {false}, + 1, spp::sparse_hash_set(), + spp::sparse_hash_set(), 10, "", 30, 4, "", 1, {}, {}, {}, 0, + "", "",{}, 1000, + true, false, true, "", false).get(); + + ASSERT_EQ(1, results["hits"].size()); + ASSERT_EQ("2", results["hits"][0]["document"]["id"].get()); + ASSERT_EQ("just...grilled", results["hits"][0]["highlights"][0]["snippet"].get()); + + collectionManager.drop_collection("coll1"); +} + TEST_F(CollectionSpecificTest, TypoCorrectionWithFaceting) { std::vector fields = {field("name", field_types::STRING, false), field("brand", field_types::STRING, true),