mirror of
https://github.com/typesense/typesense.git
synced 2025-05-18 20:52:50 +08:00
Typo and drop tokens thresholds must be applied independently.
This commit is contained in:
parent
0ae718d067
commit
672c895805
@ -1984,6 +1984,12 @@ void Index::search_field(const uint8_t & field_id,
|
||||
// when no more costs are left for this token
|
||||
if(token_to_costs[token_index].empty()) {
|
||||
// we can try to drop the token and search with remaining tokens
|
||||
|
||||
if(field_num_results >= drop_tokens_threshold) {
|
||||
// but if drop_tokens_threshold is breached, we are done
|
||||
return ;
|
||||
}
|
||||
|
||||
token_to_costs.erase(token_to_costs.begin()+token_index);
|
||||
search_tokens.erase(search_tokens.begin()+token_index);
|
||||
query_tokens.erase(query_tokens.begin()+token_index);
|
||||
@ -2010,8 +2016,8 @@ void Index::search_field(const uint8_t & field_id,
|
||||
|
||||
resume_typo_loop:
|
||||
|
||||
if(field_num_results >= drop_tokens_threshold || field_num_results >= typo_tokens_threshold) {
|
||||
// if either threshold is breached, we are done
|
||||
if(field_num_results >= typo_tokens_threshold) {
|
||||
// if typo threshold is breached, we are done
|
||||
return ;
|
||||
}
|
||||
|
||||
@ -2022,6 +2028,11 @@ void Index::search_field(const uint8_t & field_id,
|
||||
if(!query_tokens.empty() && num_tokens_dropped < query_tokens.size()) {
|
||||
// Drop tokens from right until (len/2 + 1), and then from left until (len/2 + 1)
|
||||
|
||||
if(field_num_results >= drop_tokens_threshold) {
|
||||
// if drop_tokens_threshold is breached, we are done
|
||||
return ;
|
||||
}
|
||||
|
||||
std::vector<token_t> truncated_tokens;
|
||||
num_tokens_dropped++;
|
||||
|
||||
|
@ -309,6 +309,31 @@ TEST_F(CollectionSpecificTest, ExactMatchOnPrefix) {
|
||||
collectionManager.drop_collection("coll1");
|
||||
}
|
||||
|
||||
TEST_F(CollectionSpecificTest, TypoPrefixSearchWithoutPrefixEnabled) {
|
||||
std::vector<field> fields = {field("title", field_types::STRING, false),
|
||||
field("points", field_types::INT32, false),};
|
||||
|
||||
Collection* coll1 = collectionManager.create_collection("coll1", 1, fields, "points").get();
|
||||
|
||||
nlohmann::json doc1;
|
||||
doc1["id"] = "0";
|
||||
doc1["title"] = "Cisco SG25026HP Gigabit Smart Switch";
|
||||
doc1["points"] = 100;
|
||||
|
||||
ASSERT_TRUE(coll1->add(doc1.dump()).ok());
|
||||
|
||||
auto results = coll1->search("SG25026H", {"title"}, "", {}, {}, {2}, 10,
|
||||
1, FREQUENCY, {false}, 0,
|
||||
spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 5,
|
||||
"", 1).get();
|
||||
|
||||
ASSERT_EQ(1, results["hits"].size());
|
||||
ASSERT_EQ("0", results["hits"][0]["document"]["id"].get<std::string>());
|
||||
|
||||
collectionManager.drop_collection("coll1");
|
||||
}
|
||||
|
||||
TEST_F(CollectionSpecificTest, PrefixWithTypos) {
|
||||
std::vector<field> fields = {field("title", field_types::STRING, false),
|
||||
field("points", field_types::INT32, false),};
|
||||
|
@ -292,7 +292,7 @@ TEST_F(CollectionTest, SkipUnindexedTokensDuringPhraseSearch) {
|
||||
}
|
||||
|
||||
results.clear();
|
||||
results = collection->search("the a DoesNotExist", query_fields, "", facets, sort_fields, {0}, 10, 1, FREQUENCY, {false}, 0).get();
|
||||
results = collection->search("the a insurance", query_fields, "", facets, sort_fields, {0}, 10, 1, FREQUENCY, {false}, 0).get();
|
||||
ASSERT_EQ(0, results["hits"].size());
|
||||
|
||||
// with no indexed word
|
||||
|
Loading…
x
Reference in New Issue
Block a user