mirror of
https://github.com/typesense/typesense.git
synced 2025-05-19 21:22:25 +08:00
Fixed an issue with prefix searching.
This commit is contained in:
parent
aa1cd0acd1
commit
51f57d3dd7
@ -1318,10 +1318,10 @@ static void art_fuzzy_recurse(unsigned char p, unsigned char c, const art_node *
|
||||
|
||||
Also, for prefix searches we don't compare with full leaf key.
|
||||
*/
|
||||
const int end_index = prefix ? min(l->key_len, term_len) : l->key_len;
|
||||
const int iter_len = prefix ? min(l->key_len - 1, term_len) : l->key_len;
|
||||
|
||||
// If at any point, `temp_cost > 2*max_cost` we can terminate immediately as we can never recover from that
|
||||
while(depth < end_index && temp_cost <= 2*max_cost) {
|
||||
while(depth < iter_len && temp_cost <= 2 * max_cost) {
|
||||
c = l->key[depth];
|
||||
temp_cost = levenshtein_dist(depth, p, c, term, term_len, rows[i], rows[j], rows[k]);
|
||||
printf("leaf char: %c\n", l->key[depth]);
|
||||
@ -1338,12 +1338,12 @@ static void art_fuzzy_recurse(unsigned char p, unsigned char c, const art_node *
|
||||
|
||||
int final_cost = rows[j][columns-1];
|
||||
|
||||
if(prefix && term_len < (int) l->key_len && temp_cost >= min_cost && temp_cost <= max_cost) {
|
||||
if(prefix && term_len < (int) l->key_len - 1 && temp_cost >= min_cost && temp_cost <= max_cost) {
|
||||
results.push_back(n);
|
||||
return;
|
||||
}
|
||||
|
||||
if(prefix && term_len >= (int) l->key_len && final_cost >= min_cost && final_cost <= max_cost) {
|
||||
if(prefix && term_len >= (int) l->key_len - 1 && final_cost >= min_cost && final_cost <= max_cost) {
|
||||
results.push_back(n);
|
||||
return;
|
||||
}
|
||||
|
@ -718,8 +718,9 @@ void Index::do_facets(std::vector<facet> & facets, facet_query_t & facet_query,
|
||||
|
||||
std::vector<art_leaf *> leaves;
|
||||
|
||||
const size_t q_len = prefix_search ? q.length() : q.length() + 1;
|
||||
art_fuzzy_search(t, (const unsigned char *) q.c_str(),
|
||||
q.size(), 0, bounded_cost, 10000,
|
||||
q_len, 0, bounded_cost, 10000,
|
||||
token_ordering::MAX_SCORE, prefix_search, nullptr, 0, leaves);
|
||||
|
||||
for (size_t leaf_index = 0; leaf_index < leaves.size(); leaf_index++) {
|
||||
@ -1416,7 +1417,7 @@ void Index::collate_included_ids(const std::vector<std::string>& q_included_toke
|
||||
std::vector<art_leaf *> override_query;
|
||||
|
||||
for(const std::string& token: q_included_tokens) {
|
||||
const size_t token_len = token.length();
|
||||
const size_t token_len = token.size() + 1;
|
||||
|
||||
std::vector<art_leaf*> leaves;
|
||||
art_fuzzy_search(search_index.at(field), (const unsigned char *) token.c_str(), token_len,
|
||||
|
@ -623,7 +623,7 @@ TEST(ArtTest, test_art_fuzzy_search_single_leaf_prefix) {
|
||||
EXPECT_EQ(1, l->values->ids.at(0));
|
||||
|
||||
std::vector<art_leaf*> leaves;
|
||||
art_fuzzy_search(&t, (const unsigned char *) "aplication", strlen(key), 0, 1, 10, FREQUENCY, true, nullptr, 0, leaves);
|
||||
art_fuzzy_search(&t, (const unsigned char *) "aplication", strlen(key)-1, 0, 1, 10, FREQUENCY, true, nullptr, 0, leaves);
|
||||
ASSERT_EQ(1, leaves.size());
|
||||
|
||||
res = art_tree_destroy(&t);
|
||||
@ -783,7 +783,7 @@ TEST(ArtTest, test_art_search_sku_like_tokens) {
|
||||
|
||||
for (const auto &key : keys) {
|
||||
std::vector<art_leaf *> leaves;
|
||||
art_fuzzy_search(&t, (const unsigned char*)key.c_str(), key.size()+1, 0, 0, 10,
|
||||
art_fuzzy_search(&t, (const unsigned char*)key.c_str(), key.size(), 0, 0, 10,
|
||||
FREQUENCY, true, nullptr, 0, leaves);
|
||||
ASSERT_EQ(1, leaves.size());
|
||||
ASSERT_STREQ(key.c_str(), (const char *) leaves.at(0)->key);
|
||||
@ -822,6 +822,19 @@ TEST(ArtTest, test_art_search_ill_like_tokens) {
|
||||
line++;
|
||||
}
|
||||
|
||||
std::map<std::string, size_t> key_to_count {
|
||||
std::make_pair("input", 2),
|
||||
std::make_pair("image", 7),
|
||||
std::make_pair("instrument", 2),
|
||||
std::make_pair("in", 10),
|
||||
std::make_pair("info", 2),
|
||||
std::make_pair("inventor", 2),
|
||||
std::make_pair("imageresize", 2),
|
||||
std::make_pair("id", 5),
|
||||
std::make_pair("insect", 2),
|
||||
std::make_pair("ice", 2),
|
||||
};
|
||||
|
||||
for (const auto &key : keys) {
|
||||
//LOG(INFO) << "Searching for " << key;
|
||||
art_leaf* l = (art_leaf *) art_search(&t, (const unsigned char *)key.c_str(), key.size()+1);
|
||||
@ -829,11 +842,15 @@ TEST(ArtTest, test_art_search_ill_like_tokens) {
|
||||
EXPECT_EQ(1, l->values->ids.getLength());
|
||||
|
||||
std::vector<art_leaf *> leaves;
|
||||
art_fuzzy_search(&t, (const unsigned char*)key.c_str(), key.size()+1, 0, 0, 10,
|
||||
art_fuzzy_search(&t, (const unsigned char*)key.c_str(), key.size(), 0, 0, 10,
|
||||
FREQUENCY, true, nullptr, 0, leaves);
|
||||
|
||||
ASSERT_EQ(1, leaves.size());
|
||||
ASSERT_STREQ(key.c_str(), (const char *) leaves.at(0)->key);
|
||||
if(key_to_count.count(key) != 0) {
|
||||
ASSERT_EQ(key_to_count[key], leaves.size());
|
||||
} else {
|
||||
ASSERT_EQ(1, leaves.size());
|
||||
ASSERT_STREQ(key.c_str(), (const char *) leaves.at(0)->key);
|
||||
}
|
||||
|
||||
leaves.clear();
|
||||
|
||||
@ -872,7 +889,7 @@ TEST(ArtTest, test_art_search_ill_like_tokens2) {
|
||||
EXPECT_EQ(1, l->values->ids.getLength());
|
||||
|
||||
std::vector<art_leaf *> leaves;
|
||||
art_fuzzy_search(&t, (const unsigned char*)key.c_str(), key.size()+1, 0, 0, 10,
|
||||
art_fuzzy_search(&t, (const unsigned char*)key.c_str(), key.size(), 0, 0, 10,
|
||||
FREQUENCY, true, nullptr, 0, leaves);
|
||||
|
||||
ASSERT_EQ(1, leaves.size());
|
||||
@ -891,6 +908,33 @@ TEST(ArtTest, test_art_search_ill_like_tokens2) {
|
||||
ASSERT_TRUE(res == 0);
|
||||
}
|
||||
|
||||
TEST(ArtTest, test_art_search_roche_chews) {
|
||||
art_tree t;
|
||||
int res = art_tree_init(&t);
|
||||
ASSERT_TRUE(res == 0);
|
||||
|
||||
std::vector<std::string> keys;
|
||||
keys = {"roche"};
|
||||
|
||||
art_document doc = get_document((uint32_t) 1);
|
||||
ASSERT_TRUE(NULL == art_insert(&t, (unsigned char *) keys[0].c_str(), keys[0].size()+1, &doc, 1));
|
||||
|
||||
std::string term = "chews";
|
||||
std::vector<art_leaf *> leaves;
|
||||
art_fuzzy_search(&t, (const unsigned char*)term.c_str(), term.size(), 0, 2, 10,
|
||||
FREQUENCY, true, nullptr, 0, leaves);
|
||||
|
||||
ASSERT_EQ(0, leaves.size());
|
||||
|
||||
art_fuzzy_search(&t, (const unsigned char*)keys[0].c_str(), keys[0].size() + 1, 0, 0, 10,
|
||||
FREQUENCY, false, nullptr, 0, leaves);
|
||||
|
||||
ASSERT_EQ(1, leaves.size());
|
||||
|
||||
res = art_tree_destroy(&t);
|
||||
ASSERT_TRUE(res == 0);
|
||||
}
|
||||
|
||||
TEST(ArtTest, test_encode_int32) {
|
||||
unsigned char chars[8];
|
||||
|
||||
|
@ -175,7 +175,7 @@ TEST_F(CollectionFacetingTest, FacetCounts) {
|
||||
results = coll_array_fields->search("*", query_fields, "", facets, sort_fields, 0, 10, 1, FREQUENCY,
|
||||
false, Index::DROP_TOKENS_THRESHOLD,
|
||||
spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "tags: fine pltinum").get();
|
||||
spp::sparse_hash_set<std::string>(), 10, "tags: fxne aluminium").get();
|
||||
|
||||
ASSERT_EQ(5, results["hits"].size());
|
||||
ASSERT_EQ(1, results["facet_counts"].size());
|
||||
|
Loading…
x
Reference in New Issue
Block a user