mirror of
https://github.com/typesense/typesense.git
synced 2025-05-20 21:52:23 +08:00
Use token separators whule parsing search query as well.
This commit is contained in:
parent
9659d60047
commit
adc816e662
@ -1374,7 +1374,7 @@ void Collection::parse_search_query(const std::string &query, std::vector<std::s
|
||||
std::vector<char> custom_symbols = symbols_to_index;
|
||||
custom_symbols.push_back('-');
|
||||
|
||||
Tokenizer(query, true, false, locale, custom_symbols).tokenize(tokens);
|
||||
Tokenizer(query, true, false, locale, custom_symbols, token_separators).tokenize(tokens);
|
||||
}
|
||||
|
||||
bool exclude_operator_prior = false;
|
||||
|
@ -975,13 +975,26 @@ TEST_F(CollectionSpecificTest, CustomSeparators) {
|
||||
1, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 1, {}, {}, {}, 0,
|
||||
"<mark>", "</mark>",{}, 1000,
|
||||
true, false, true, "", 1000).get();
|
||||
true, false, true, "", true).get();
|
||||
|
||||
ASSERT_EQ(1, results["hits"].size());
|
||||
ASSERT_EQ(1, results["hits"][0]["highlights"].size());
|
||||
ASSERT_EQ("name", results["hits"][0]["highlights"][0]["field"]);
|
||||
ASSERT_EQ("alpha-beta-<mark>gamma</mark>-omega-zeta", results["hits"][0]["highlights"][0]["snippet"]);
|
||||
|
||||
results = coll1->search("gamma-omega", {"name"},
|
||||
"", {}, {}, {0}, 10,
|
||||
1, FREQUENCY, {false},
|
||||
1, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 1, {}, {}, {}, 0,
|
||||
"<mark>", "</mark>",{}, 1000,
|
||||
true, false, true, "", false).get();
|
||||
|
||||
ASSERT_EQ(1, results["hits"].size());
|
||||
ASSERT_EQ(1, results["hits"][0]["highlights"].size());
|
||||
ASSERT_EQ("name", results["hits"][0]["highlights"][0]["field"]);
|
||||
ASSERT_EQ("alpha-beta-<mark>gamma</mark>-<mark>omega</mark>-zeta", results["hits"][0]["highlights"][0]["snippet"].get<std::string>());
|
||||
|
||||
// ensure that symbols are validated
|
||||
|
||||
nlohmann::json coll_def;
|
||||
@ -1038,7 +1051,7 @@ TEST_F(CollectionSpecificTest, CustomSymbolsForIndexing) {
|
||||
1, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 1, {}, {}, {}, 0,
|
||||
"<mark>", "</mark>",{}, 1000,
|
||||
true, false, true, "", 1000).get();
|
||||
true, false, true, "", false).get();
|
||||
|
||||
ASSERT_EQ(1, results["hits"].size());
|
||||
ASSERT_EQ("0", results["hits"][0]["document"]["id"].get<std::string>());
|
||||
@ -1059,7 +1072,7 @@ TEST_F(CollectionSpecificTest, CustomSymbolsForIndexing) {
|
||||
1, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 1, {}, {}, {}, 0,
|
||||
"<mark>", "</mark>",{}, 1000,
|
||||
true, false, true, "", 1000).get();
|
||||
true, false, true, "", false).get();
|
||||
|
||||
ASSERT_EQ(2, results["hits"].size());
|
||||
ASSERT_EQ("1", results["hits"][0]["document"]["id"].get<std::string>());
|
||||
@ -1094,6 +1107,108 @@ TEST_F(CollectionSpecificTest, CustomSymbolsForIndexing) {
|
||||
collectionManager.drop_collection("coll1");
|
||||
}
|
||||
|
||||
TEST_F(CollectionSpecificTest, CustomSeparatorsHandleQueryVariations) {
|
||||
std::vector<field> fields = {field("name", field_types::STRING, false),
|
||||
field("points", field_types::INT32, false),};
|
||||
|
||||
Collection* coll1 = collectionManager.create_collection(
|
||||
"coll1", 1, fields, "points", 0, "", {}, {"-", ".", "*", "&", "/"}
|
||||
).get();
|
||||
|
||||
nlohmann::json doc1;
|
||||
doc1["id"] = "0";
|
||||
doc1["name"] = "1&1 Internet Limited";
|
||||
doc1["points"] = 100;
|
||||
|
||||
nlohmann::json doc2;
|
||||
doc2["id"] = "1";
|
||||
doc2["name"] = "bofrost*dienstl";
|
||||
doc2["points"] = 100;
|
||||
|
||||
nlohmann::json doc3;
|
||||
doc3["id"] = "2";
|
||||
doc3["name"] = "just...grilled";
|
||||
doc3["points"] = 100;
|
||||
|
||||
ASSERT_TRUE(coll1->add(doc1.dump()).ok());
|
||||
ASSERT_TRUE(coll1->add(doc2.dump()).ok());
|
||||
ASSERT_TRUE(coll1->add(doc3.dump()).ok());
|
||||
|
||||
auto results = coll1->search("bofrost*dienstl", {"name"},
|
||||
"", {}, {}, {0}, 10,
|
||||
1, FREQUENCY, {false},
|
||||
1, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 1, {}, {}, {}, 0,
|
||||
"<mark>", "</mark>",{}, 1000,
|
||||
true, false, true, "", false).get();
|
||||
|
||||
ASSERT_EQ(1, results["hits"].size());
|
||||
ASSERT_EQ("1", results["hits"][0]["document"]["id"].get<std::string>());
|
||||
ASSERT_EQ("<mark>bofrost</mark>*<mark>dienstl</mark>", results["hits"][0]["highlights"][0]["snippet"].get<std::string>());
|
||||
|
||||
results = coll1->search("bofrost * dienstl", {"name"},
|
||||
"", {}, {}, {0}, 10,
|
||||
1, FREQUENCY, {false},
|
||||
1, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 1, {}, {}, {}, 0,
|
||||
"<mark>", "</mark>",{}, 1000,
|
||||
true, false, true, "", false).get();
|
||||
|
||||
ASSERT_EQ(1, results["hits"].size());
|
||||
ASSERT_EQ("1", results["hits"][0]["document"]["id"].get<std::string>());
|
||||
ASSERT_EQ("<mark>bofrost</mark>*<mark>dienstl</mark>", results["hits"][0]["highlights"][0]["snippet"].get<std::string>());
|
||||
|
||||
results = coll1->search("bofrost dienstl", {"name"},
|
||||
"", {}, {}, {0}, 10,
|
||||
1, FREQUENCY, {false},
|
||||
1, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 1, {}, {}, {}, 0,
|
||||
"<mark>", "</mark>",{}, 1000,
|
||||
true, false, true, "", false).get();
|
||||
|
||||
ASSERT_EQ(1, results["hits"].size());
|
||||
ASSERT_EQ("1", results["hits"][0]["document"]["id"].get<std::string>());
|
||||
ASSERT_EQ("<mark>bofrost</mark>*<mark>dienstl</mark>", results["hits"][0]["highlights"][0]["snippet"].get<std::string>());
|
||||
|
||||
results = coll1->search("1&1", {"name"},
|
||||
"", {}, {}, {0}, 10,
|
||||
1, FREQUENCY, {false},
|
||||
1, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 1, {}, {}, {}, 0,
|
||||
"<mark>", "</mark>",{}, 1000,
|
||||
true, false, true, "", false).get();
|
||||
|
||||
ASSERT_EQ(1, results["hits"].size());
|
||||
ASSERT_EQ("0", results["hits"][0]["document"]["id"].get<std::string>());
|
||||
ASSERT_EQ("<mark>1</mark>&<mark>1</mark> Internet Limited", results["hits"][0]["highlights"][0]["snippet"].get<std::string>());
|
||||
|
||||
results = coll1->search("1 & 1", {"name"},
|
||||
"", {}, {}, {0}, 10,
|
||||
1, FREQUENCY, {false},
|
||||
1, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 1, {}, {}, {}, 0,
|
||||
"<mark>", "</mark>",{}, 1000,
|
||||
true, false, true, "", false).get();
|
||||
|
||||
ASSERT_EQ(1, results["hits"].size());
|
||||
ASSERT_EQ("0", results["hits"][0]["document"]["id"].get<std::string>());
|
||||
ASSERT_EQ("<mark>1</mark>&<mark>1</mark> Internet Limited", results["hits"][0]["highlights"][0]["snippet"].get<std::string>());
|
||||
|
||||
results = coll1->search("just grilled", {"name"},
|
||||
"", {}, {}, {0}, 10,
|
||||
1, FREQUENCY, {false},
|
||||
1, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 1, {}, {}, {}, 0,
|
||||
"<mark>", "</mark>",{}, 1000,
|
||||
true, false, true, "", false).get();
|
||||
|
||||
ASSERT_EQ(1, results["hits"].size());
|
||||
ASSERT_EQ("2", results["hits"][0]["document"]["id"].get<std::string>());
|
||||
ASSERT_EQ("<mark>just</mark>...<mark>grilled</mark>", results["hits"][0]["highlights"][0]["snippet"].get<std::string>());
|
||||
|
||||
collectionManager.drop_collection("coll1");
|
||||
}
|
||||
|
||||
TEST_F(CollectionSpecificTest, TypoCorrectionWithFaceting) {
|
||||
std::vector<field> fields = {field("name", field_types::STRING, false),
|
||||
field("brand", field_types::STRING, true),
|
||||
|
Loading…
x
Reference in New Issue
Block a user