mirror of
https://github.com/typesense/typesense.git
synced 2025-05-18 12:42:50 +08:00
Use all candidates of a given num_typo value.
Typo tokens threshold should not trigger when we have explored only some of the candidates of a given num_typo value.
This commit is contained in:
parent
bfb122bfec
commit
0ae718d067
@ -910,6 +910,8 @@ void Index::search_candidates(const uint8_t & field_id,
|
||||
auto product = []( long long a, token_candidates & b ) { return a*b.candidates.size(); };
|
||||
long long int N = std::accumulate(token_candidates_vec.begin(), token_candidates_vec.end(), 1LL, product);
|
||||
|
||||
size_t last_cost = 0;
|
||||
|
||||
for(long long n=0; n<N && n<combination_limit; ++n) {
|
||||
// every element in `query_suggestion` contains a token and its associated hits
|
||||
std::vector<art_leaf*> query_suggestion(token_candidates_vec.size());
|
||||
@ -921,6 +923,13 @@ void Index::search_candidates(const uint8_t & field_id,
|
||||
uint32_t total_cost = next_suggestion(token_candidates_vec, n, actual_query_suggestion,
|
||||
query_suggestion, token_bits);
|
||||
|
||||
//LOG(INFO) << "field_num_results: " << field_num_results << ", typo_tokens_threshold: " << typo_tokens_threshold;
|
||||
if(total_cost != last_cost && field_num_results >= typo_tokens_threshold) {
|
||||
//break;
|
||||
}
|
||||
|
||||
last_cost = total_cost;
|
||||
|
||||
/*LOG(INFO) << "n: " << n;
|
||||
for(size_t i=0; i < actual_query_suggestion.size(); i++) {
|
||||
LOG(INFO) << "i: " << i << " - " << actual_query_suggestion[i]->key << ", ids: "
|
||||
@ -1019,11 +1028,6 @@ void Index::search_candidates(const uint8_t & field_id,
|
||||
}
|
||||
|
||||
searched_queries.push_back(actual_query_suggestion);
|
||||
|
||||
//LOG(INFO) << "field_num_results: " << field_num_results << ", typo_tokens_threshold: " << typo_tokens_threshold;
|
||||
if(field_num_results >= typo_tokens_threshold) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -276,6 +276,39 @@ TEST_F(CollectionSpecificTest, MultiFieldArrayRepeatingTokens) {
|
||||
collectionManager.drop_collection("coll1");
|
||||
}
|
||||
|
||||
TEST_F(CollectionSpecificTest, ExactMatchOnPrefix) {
|
||||
std::vector<field> fields = {field("title", field_types::STRING, false),
|
||||
field("points", field_types::INT32, false),};
|
||||
|
||||
Collection* coll1 = collectionManager.create_collection("coll1", 1, fields, "points").get();
|
||||
|
||||
nlohmann::json doc1;
|
||||
doc1["id"] = "0";
|
||||
doc1["title"] = "Yeshivah Gedolah High School";
|
||||
doc1["points"] = 100;
|
||||
|
||||
nlohmann::json doc2;
|
||||
doc2["id"] = "1";
|
||||
doc2["title"] = "GED";
|
||||
doc2["points"] = 50;
|
||||
|
||||
ASSERT_TRUE(coll1->add(doc1.dump()).ok());
|
||||
ASSERT_TRUE(coll1->add(doc2.dump()).ok());
|
||||
|
||||
auto results = coll1->search("ged", {"title"}, "", {}, {}, {2}, 10,
|
||||
1, FREQUENCY, {true}, Index::DROP_TOKENS_THRESHOLD,
|
||||
spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 5,
|
||||
"", 1).get();
|
||||
|
||||
ASSERT_EQ(2, results["hits"].size());
|
||||
|
||||
ASSERT_EQ("1", results["hits"][0]["document"]["id"].get<std::string>());
|
||||
ASSERT_EQ("0", results["hits"][1]["document"]["id"].get<std::string>());
|
||||
|
||||
collectionManager.drop_collection("coll1");
|
||||
}
|
||||
|
||||
TEST_F(CollectionSpecificTest, PrefixWithTypos) {
|
||||
std::vector<field> fields = {field("title", field_types::STRING, false),
|
||||
field("points", field_types::INT32, false),};
|
||||
|
Loading…
x
Reference in New Issue
Block a user