mirror of
https://github.com/typesense/typesense.git
synced 2025-05-18 20:52:50 +08:00
Exhaustive token searching with filter_ids
This commit is contained in:
parent
bcea70ebfd
commit
a08fb7738f
@ -65,6 +65,7 @@ FILE(GLOB SRC_FILES src/*.cpp)
|
||||
FILE(GLOB TEST_FILES test/*.cpp)
|
||||
|
||||
include_directories(include)
|
||||
include_directories(/usr/local/include)
|
||||
include_directories(${OPENSSL_INCLUDE_DIR})
|
||||
include_directories(${CURL_INCLUDE_DIR})
|
||||
include_directories(${ICU_INCLUDE_DIRS})
|
||||
@ -77,6 +78,7 @@ include_directories(${DEP_ROOT_DIR}/${BRPC_NAME}/include)
|
||||
include_directories(${DEP_ROOT_DIR}/${BRAFT_NAME}/include)
|
||||
include_directories(${DEP_ROOT_DIR}/${JEMALLOC_NAME}/include/jemalloc)
|
||||
|
||||
link_directories(/usr/local/lib)
|
||||
link_directories(${DEP_ROOT_DIR}/${GTEST_NAME}/googletest/build)
|
||||
link_directories(${DEP_ROOT_DIR}/${FOR_NAME})
|
||||
link_directories(${DEP_ROOT_DIR}/${H2O_NAME}/build)
|
||||
|
@ -245,7 +245,9 @@ int art_iter_prefix(art_tree *t, const unsigned char *prefix, int prefix_len, ar
|
||||
* Returns leaves that match a given string within a fuzzy distance of max_cost.
|
||||
*/
|
||||
int art_fuzzy_search(art_tree *t, const unsigned char *term, const int term_len, const int min_cost, const int max_cost,
|
||||
const int max_words, const token_ordering token_order, const bool prefix, std::vector<art_leaf *> &results);
|
||||
const int max_words, const token_ordering token_order, const bool prefix,
|
||||
const uint32_t *filter_ids, size_t filter_ids_length,
|
||||
std::vector<art_leaf *> &results);
|
||||
|
||||
int art_topk_iter(const art_node *root, token_ordering token_order, size_t max_results,
|
||||
std::vector<art_leaf *> &results);
|
||||
|
@ -35,6 +35,10 @@ private:
|
||||
int low_index, int high_index, uint32_t base, uint32_t bits,
|
||||
uint32_t *indices);
|
||||
|
||||
void binary_count_indices(const uint32_t *values, int low_vindex, int high_vindex,
|
||||
int low_index, int high_index, uint32_t base, uint32_t bits,
|
||||
size_t& num_found);
|
||||
|
||||
public:
|
||||
|
||||
void load(const uint32_t *sorted_array, const uint32_t array_length);
|
||||
@ -45,7 +49,9 @@ public:
|
||||
|
||||
uint32_t indexOf(uint32_t value);
|
||||
|
||||
void indexOf(const uint32_t *values, const size_t values_len, uint32_t* indices);
|
||||
void indexOf(const uint32_t *values, size_t values_len, uint32_t* indices);
|
||||
|
||||
size_t numFoundOf(const uint32_t *values, const size_t values_len);
|
||||
|
||||
// returns false if malloc fails
|
||||
size_t append(uint32_t value);
|
||||
|
18
src/art.cpp
18
src/art.cpp
@ -904,7 +904,8 @@ void* art_delete(art_tree *t, const unsigned char *key, int key_len) {
|
||||
}*/
|
||||
|
||||
int art_topk_iter(const art_node *root, token_ordering token_order, size_t max_results,
|
||||
std::vector<art_leaf *> &results) {
|
||||
const uint32_t* filter_ids, size_t filter_ids_length,
|
||||
std::vector<art_leaf *> &results) {
|
||||
printf("INSIDE art_topk_iter: root->type: %d\n", root->type);
|
||||
|
||||
std::priority_queue<const art_node *, std::vector<const art_node *>,
|
||||
@ -924,7 +925,17 @@ int art_topk_iter(const art_node *root, token_ordering token_order, size_t max_r
|
||||
if (!n) continue;
|
||||
if (IS_LEAF(n)) {
|
||||
art_leaf *l = (art_leaf *) LEAF_RAW(n);
|
||||
results.push_back(l);
|
||||
|
||||
if(filter_ids_length == 0) {
|
||||
results.push_back(l);
|
||||
} else {
|
||||
// we will push leaf only if filter matches with leaf IDs
|
||||
size_t found_len = l->values->ids.numFoundOf(filter_ids, filter_ids_length);
|
||||
if(found_len != 0) {
|
||||
results.push_back(l);
|
||||
}
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -1383,6 +1394,7 @@ static void art_fuzzy_recurse(unsigned char p, unsigned char c, const art_node *
|
||||
*/
|
||||
int art_fuzzy_search(art_tree *t, const unsigned char *term, const int term_len, const int min_cost, const int max_cost,
|
||||
const int max_words, const token_ordering token_order, const bool prefix,
|
||||
const uint32_t *filter_ids, size_t filter_ids_length,
|
||||
std::vector<art_leaf *> &results) {
|
||||
|
||||
std::vector<const art_node*> nodes;
|
||||
@ -1412,7 +1424,7 @@ int art_fuzzy_search(art_tree *t, const unsigned char *term, const int term_len,
|
||||
//begin = std::chrono::high_resolution_clock::now();
|
||||
|
||||
for(auto node: nodes) {
|
||||
art_topk_iter(node, token_order, max_words, results);
|
||||
art_topk_iter(node, token_order, max_words, filter_ids, filter_ids_length, results);
|
||||
}
|
||||
|
||||
if(token_order == FREQUENCY) {
|
||||
|
@ -697,7 +697,7 @@ void Index::do_facets(std::vector<facet> & facets, facet_query_t & facet_query,
|
||||
|
||||
art_fuzzy_search(t, (const unsigned char *) q.c_str(),
|
||||
q.size(), 0, bounded_cost, 10000,
|
||||
token_ordering::MAX_SCORE, prefix_search, leaves);
|
||||
token_ordering::MAX_SCORE, prefix_search, nullptr, 0, leaves);
|
||||
|
||||
for (size_t leaf_index = 0; leaf_index < leaves.size(); leaf_index++) {
|
||||
const auto &leaf = leaves[leaf_index];
|
||||
@ -1248,7 +1248,7 @@ void Index::collate_included_ids(const std::vector<std::string>& q_included_toke
|
||||
|
||||
std::vector<art_leaf*> leaves;
|
||||
art_fuzzy_search(search_index.at(field), (const unsigned char *) token.c_str(), token_len,
|
||||
0, 0, 1, token_ordering::MAX_SCORE, false, leaves);
|
||||
0, 0, 1, token_ordering::MAX_SCORE, false, nullptr, 0, leaves);
|
||||
|
||||
if(!leaves.empty()) {
|
||||
override_query.push_back(leaves[0]);
|
||||
@ -1550,7 +1550,7 @@ void Index::search(Option<uint32_t> & outcome,
|
||||
const bool prefix_search = prefix && (token_index == q_include_tokens.size()-1);
|
||||
const size_t token_len = prefix_search ? (int) token.length() : (int) token.length() + 1;
|
||||
art_fuzzy_search(search_index.at(field), (const unsigned char *) token.c_str(), token_len,
|
||||
0, 0, 1, token_order, prefix_search, leaves);
|
||||
0, 0, 1, token_order, prefix_search, nullptr, 0, leaves);
|
||||
|
||||
if(leaves.empty()) {
|
||||
continue;
|
||||
@ -1701,7 +1701,8 @@ void Index::search_field(const uint8_t & field_id,
|
||||
// If this is a prefix search, look for more candidates and do a union of those document IDs
|
||||
const int max_candidates = prefix_search ? 10 : 3;
|
||||
art_fuzzy_search(search_index.at(field), (const unsigned char *) token.c_str(), token_len,
|
||||
costs[token_index], costs[token_index], max_candidates, token_order, prefix_search, leaves);
|
||||
costs[token_index], costs[token_index], max_candidates, token_order, prefix_search,
|
||||
filter_ids, filter_ids_length, leaves);
|
||||
|
||||
if(!leaves.empty()) {
|
||||
token_cost_cache.emplace(token_cost_hash, leaves);
|
||||
|
@ -174,13 +174,13 @@ void sorted_array::indexOf(const uint32_t *values, const size_t values_len, uint
|
||||
do {
|
||||
head++;
|
||||
low_index = lower_bound_search_bits(in+METADATA_OVERHEAD, 0, length-1, base, bits, values[head], &actual_value);
|
||||
} while(actual_value != values[head]);
|
||||
} while(head < int(values_len - 1) && actual_value != values[head]);
|
||||
|
||||
int tail = values_len;
|
||||
do {
|
||||
tail--;
|
||||
high_index = lower_bound_search_bits(in+METADATA_OVERHEAD, 0, length-1, base, bits, values[tail], &actual_value);
|
||||
} while(actual_value != values[tail]);
|
||||
} while(tail > 0 && actual_value != values[tail]);
|
||||
|
||||
for(int i = 0; i < head; i++) {
|
||||
indices[i] = length;
|
||||
@ -237,4 +237,55 @@ void sorted_array::remove_values(uint32_t *sorted_values, uint32_t sorted_values
|
||||
load(new_array, new_index);
|
||||
delete[] curr_array;
|
||||
delete[] new_array;
|
||||
}
|
||||
}
|
||||
|
||||
size_t sorted_array::numFoundOf(const uint32_t *values, const size_t values_len) {
|
||||
size_t num_found = 0;
|
||||
|
||||
if(length == 0 || values_len == 0) {
|
||||
return num_found;
|
||||
}
|
||||
|
||||
uint32_t base = *(uint32_t *)(in + 0);
|
||||
uint32_t bits = *(in + 4);
|
||||
|
||||
uint32_t low_index, high_index;
|
||||
uint32_t actual_value = 0;
|
||||
|
||||
// identify the upper and lower bounds of the search space
|
||||
int head = -1;
|
||||
do {
|
||||
head++;
|
||||
low_index = lower_bound_search_bits(in+METADATA_OVERHEAD, 0, length-1, base, bits, values[head], &actual_value);
|
||||
} while(head < int(values_len - 1) && actual_value != values[head]);
|
||||
|
||||
int tail = values_len;
|
||||
do {
|
||||
tail--;
|
||||
high_index = lower_bound_search_bits(in+METADATA_OVERHEAD, 0, length-1, base, bits, values[tail], &actual_value);
|
||||
} while(tail > 0 && actual_value != values[tail]);
|
||||
|
||||
// recursively search within the bounds for all values
|
||||
binary_count_indices(values, head, tail, low_index, high_index, base, bits, num_found);
|
||||
|
||||
return num_found;
|
||||
}
|
||||
|
||||
void sorted_array::binary_count_indices(const uint32_t *values, int low_vindex, int high_vindex, int low_index,
|
||||
int high_index, uint32_t base, uint32_t bits, size_t& num_found) {
|
||||
|
||||
uint32_t actual_value = 0;
|
||||
|
||||
if(high_vindex >= low_vindex && high_index >= low_index) {
|
||||
size_t pivot_vindex = (low_vindex + high_vindex) / 2;
|
||||
|
||||
uint32_t in_index = lower_bound_search_bits(in+METADATA_OVERHEAD, low_index, high_index, base, bits,
|
||||
values[pivot_vindex], &actual_value);
|
||||
if(actual_value == values[pivot_vindex]) {
|
||||
num_found++;
|
||||
}
|
||||
|
||||
binary_count_indices(values, low_vindex, pivot_vindex-1, low_index, in_index-1, base, bits, num_found);
|
||||
binary_count_indices(values, pivot_vindex+1, high_vindex, in_index+1, high_index, base, bits, num_found);
|
||||
}
|
||||
}
|
||||
|
@ -588,22 +588,22 @@ TEST(ArtTest, test_art_fuzzy_search_single_leaf) {
|
||||
EXPECT_EQ(1, l->values->ids.at(0));
|
||||
|
||||
std::vector<art_leaf*> leaves;
|
||||
art_fuzzy_search(&t, (const unsigned char *) implement_key, strlen(implement_key) + 1, 0, 0, 10, FREQUENCY, false, leaves);
|
||||
art_fuzzy_search(&t, (const unsigned char *) implement_key, strlen(implement_key) + 1, 0, 0, 10, FREQUENCY, false, nullptr, 0, leaves);
|
||||
ASSERT_EQ(1, leaves.size());
|
||||
|
||||
const char* implement_key_typo1 = "implment";
|
||||
const char* implement_key_typo2 = "implwnent";
|
||||
|
||||
leaves.clear();
|
||||
art_fuzzy_search(&t, (const unsigned char *) implement_key_typo1, strlen(implement_key_typo1) + 1, 0, 0, 10, FREQUENCY, false, leaves);
|
||||
art_fuzzy_search(&t, (const unsigned char *) implement_key_typo1, strlen(implement_key_typo1) + 1, 0, 0, 10, FREQUENCY, false, nullptr, 0, leaves);
|
||||
ASSERT_EQ(0, leaves.size());
|
||||
|
||||
leaves.clear();
|
||||
art_fuzzy_search(&t, (const unsigned char *) implement_key_typo1, strlen(implement_key_typo1) + 1, 0, 1, 10, FREQUENCY, false, leaves);
|
||||
art_fuzzy_search(&t, (const unsigned char *) implement_key_typo1, strlen(implement_key_typo1) + 1, 0, 1, 10, FREQUENCY, false, nullptr, 0, leaves);
|
||||
ASSERT_EQ(1, leaves.size());
|
||||
|
||||
leaves.clear();
|
||||
art_fuzzy_search(&t, (const unsigned char *) implement_key_typo2, strlen(implement_key_typo2) + 1, 0, 2, 10, FREQUENCY, false, leaves);
|
||||
art_fuzzy_search(&t, (const unsigned char *) implement_key_typo2, strlen(implement_key_typo2) + 1, 0, 2, 10, FREQUENCY, false, nullptr, 0, leaves);
|
||||
ASSERT_EQ(1, leaves.size());
|
||||
|
||||
res = art_tree_destroy(&t);
|
||||
@ -623,7 +623,7 @@ TEST(ArtTest, test_art_fuzzy_search_single_leaf_prefix) {
|
||||
EXPECT_EQ(1, l->values->ids.at(0));
|
||||
|
||||
std::vector<art_leaf*> leaves;
|
||||
art_fuzzy_search(&t, (const unsigned char *) "aplication", strlen(key), 0, 1, 10, FREQUENCY, true, leaves);
|
||||
art_fuzzy_search(&t, (const unsigned char *) "aplication", strlen(key), 0, 1, 10, FREQUENCY, true, nullptr, 0, leaves);
|
||||
ASSERT_EQ(1, leaves.size());
|
||||
|
||||
res = art_tree_destroy(&t);
|
||||
@ -651,48 +651,48 @@ TEST(ArtTest, test_art_fuzzy_search) {
|
||||
std::vector<art_leaf*> leaves;
|
||||
|
||||
// transpose
|
||||
art_fuzzy_search(&t, (const unsigned char *) "zymosthneic", strlen("zymosthneic") + 1, 0, 1, 10, FREQUENCY, false, leaves);
|
||||
art_fuzzy_search(&t, (const unsigned char *) "zymosthneic", strlen("zymosthneic") + 1, 0, 1, 10, FREQUENCY, false, nullptr, 0, leaves);
|
||||
ASSERT_EQ(1, leaves.size());
|
||||
ASSERT_STREQ("zymosthenic", (const char *)leaves.at(0)->key);
|
||||
|
||||
// transpose + missing
|
||||
leaves.clear();
|
||||
art_fuzzy_search(&t, (const unsigned char *) "dacrcyystlgia", strlen("dacrcyystlgia") + 1, 0, 2, 10, FREQUENCY, false, leaves);
|
||||
art_fuzzy_search(&t, (const unsigned char *) "dacrcyystlgia", strlen("dacrcyystlgia") + 1, 0, 2, 10, FREQUENCY, false, nullptr, 0, leaves);
|
||||
ASSERT_EQ(1, leaves.size());
|
||||
ASSERT_STREQ("dacrycystalgia", (const char *)leaves.at(0)->key);
|
||||
|
||||
leaves.clear();
|
||||
art_fuzzy_search(&t, (const unsigned char *) "dacrcyystlgia", strlen("dacrcyystlgia") + 1, 1, 2, 10, FREQUENCY, false, leaves);
|
||||
art_fuzzy_search(&t, (const unsigned char *) "dacrcyystlgia", strlen("dacrcyystlgia") + 1, 1, 2, 10, FREQUENCY, false, nullptr, 0, leaves);
|
||||
ASSERT_EQ(1, leaves.size());
|
||||
ASSERT_STREQ("dacrycystalgia", (const char *)leaves.at(0)->key);
|
||||
|
||||
// missing char
|
||||
leaves.clear();
|
||||
art_fuzzy_search(&t, (const unsigned char *) "gaberlunze", strlen("gaberlunze") + 1, 0, 1, 10, FREQUENCY, false, leaves);
|
||||
art_fuzzy_search(&t, (const unsigned char *) "gaberlunze", strlen("gaberlunze") + 1, 0, 1, 10, FREQUENCY, false, nullptr, 0, leaves);
|
||||
ASSERT_EQ(1, leaves.size());
|
||||
ASSERT_STREQ("gaberlunzie", (const char *)leaves.at(0)->key);
|
||||
|
||||
// extra char
|
||||
leaves.clear();
|
||||
art_fuzzy_search(&t, (const unsigned char *) "higghliving", strlen("higghliving") + 1, 0, 1, 10, FREQUENCY, false, leaves);
|
||||
art_fuzzy_search(&t, (const unsigned char *) "higghliving", strlen("higghliving") + 1, 0, 1, 10, FREQUENCY, false, nullptr, 0, leaves);
|
||||
ASSERT_EQ(1, leaves.size());
|
||||
ASSERT_STREQ("highliving", (const char *)leaves.at(0)->key);
|
||||
|
||||
// substituted char
|
||||
leaves.clear();
|
||||
art_fuzzy_search(&t, (const unsigned char *) "eacemiferous", strlen("eacemiferous") + 1, 0, 1, 10, FREQUENCY, false, leaves);
|
||||
art_fuzzy_search(&t, (const unsigned char *) "eacemiferous", strlen("eacemiferous") + 1, 0, 1, 10, FREQUENCY, false, nullptr, 0, leaves);
|
||||
ASSERT_EQ(1, leaves.size());
|
||||
ASSERT_STREQ("racemiferous", (const char *)leaves.at(0)->key);
|
||||
|
||||
// missing char + extra char
|
||||
leaves.clear();
|
||||
art_fuzzy_search(&t, (const unsigned char *) "Sarbruckken", strlen("Sarbruckken") + 1, 0, 2, 10, FREQUENCY, false, leaves);
|
||||
art_fuzzy_search(&t, (const unsigned char *) "Sarbruckken", strlen("Sarbruckken") + 1, 0, 2, 10, FREQUENCY, false, nullptr, 0, leaves);
|
||||
ASSERT_EQ(1, leaves.size());
|
||||
ASSERT_STREQ("Saarbrucken", (const char *)leaves.at(0)->key);
|
||||
|
||||
// multiple matching results
|
||||
leaves.clear();
|
||||
art_fuzzy_search(&t, (const unsigned char *) "hown", strlen("hown") + 1, 0, 1, 10, FREQUENCY, false, leaves);
|
||||
art_fuzzy_search(&t, (const unsigned char *) "hown", strlen("hown") + 1, 0, 1, 10, FREQUENCY, false, nullptr, 0, leaves);
|
||||
ASSERT_EQ(10, leaves.size());
|
||||
|
||||
std::vector<const char*> words = {"town", "sown", "shown", "own", "mown", "lown", "howl", "howk", "howe", "how"};
|
||||
@ -702,23 +702,23 @@ TEST(ArtTest, test_art_fuzzy_search) {
|
||||
|
||||
// fuzzy prefix search
|
||||
leaves.clear();
|
||||
art_fuzzy_search(&t, (const unsigned char *) "lionhear", strlen("lionhear"), 0, 0, 10, FREQUENCY, true, leaves);
|
||||
art_fuzzy_search(&t, (const unsigned char *) "lionhear", strlen("lionhear"), 0, 0, 10, FREQUENCY, true, nullptr, 0, leaves);
|
||||
ASSERT_EQ(3, leaves.size());
|
||||
|
||||
leaves.clear();
|
||||
art_fuzzy_search(&t, (const unsigned char *) "lineage", strlen("lineage"), 0, 0, 10, FREQUENCY, true, leaves);
|
||||
art_fuzzy_search(&t, (const unsigned char *) "lineage", strlen("lineage"), 0, 0, 10, FREQUENCY, true, nullptr, 0, leaves);
|
||||
ASSERT_EQ(2, leaves.size());
|
||||
|
||||
leaves.clear();
|
||||
art_fuzzy_search(&t, (const unsigned char *) "liq", strlen("liq"), 0, 0, 50, FREQUENCY, true, leaves);
|
||||
art_fuzzy_search(&t, (const unsigned char *) "liq", strlen("liq"), 0, 0, 50, FREQUENCY, true, nullptr, 0, leaves);
|
||||
ASSERT_EQ(39, leaves.size());
|
||||
|
||||
leaves.clear();
|
||||
art_fuzzy_search(&t, (const unsigned char *) "antitraditian", strlen("antitraditian"), 0, 1, 10, FREQUENCY, true, leaves);
|
||||
art_fuzzy_search(&t, (const unsigned char *) "antitraditian", strlen("antitraditian"), 0, 1, 10, FREQUENCY, true, nullptr, 0, leaves);
|
||||
ASSERT_EQ(1, leaves.size());
|
||||
|
||||
leaves.clear();
|
||||
art_fuzzy_search(&t, (const unsigned char *) "antisocao", strlen("antisocao"), 0, 2, 10, FREQUENCY, true, leaves);
|
||||
art_fuzzy_search(&t, (const unsigned char *) "antisocao", strlen("antisocao"), 0, 2, 10, FREQUENCY, true, nullptr, 0, leaves);
|
||||
ASSERT_EQ(10, leaves.size());
|
||||
|
||||
res = art_tree_destroy(&t);
|
||||
@ -744,7 +744,7 @@ TEST(ArtTest, test_art_fuzzy_search_unicode_chars) {
|
||||
EXPECT_EQ(1, l->values->ids.at(0));
|
||||
|
||||
std::vector<art_leaf*> leaves;
|
||||
art_fuzzy_search(&t, (unsigned char *)key, strlen(key), 0, 0, 10, FREQUENCY, true, leaves);
|
||||
art_fuzzy_search(&t, (unsigned char *)key, strlen(key), 0, 0, 10, FREQUENCY, true, nullptr, 0, leaves);
|
||||
ASSERT_EQ(1, leaves.size());
|
||||
}
|
||||
|
||||
@ -784,7 +784,7 @@ TEST(ArtTest, test_art_search_sku_like_tokens) {
|
||||
for (const auto &key : keys) {
|
||||
std::vector<art_leaf *> leaves;
|
||||
art_fuzzy_search(&t, (const unsigned char*)key.c_str(), key.size()+1, 0, 0, 10,
|
||||
FREQUENCY, true, leaves);
|
||||
FREQUENCY, true, nullptr, 0, leaves);
|
||||
ASSERT_EQ(1, leaves.size());
|
||||
ASSERT_STREQ(key.c_str(), (const char *) leaves.at(0)->key);
|
||||
|
||||
@ -792,7 +792,7 @@ TEST(ArtTest, test_art_search_sku_like_tokens) {
|
||||
|
||||
// non prefix
|
||||
art_fuzzy_search(&t, (const unsigned char*)key.c_str(), key.size()+1, 0, 0, 10,
|
||||
FREQUENCY, false, leaves);
|
||||
FREQUENCY, false, nullptr, 0, leaves);
|
||||
ASSERT_EQ(1, leaves.size());
|
||||
ASSERT_STREQ(key.c_str(), (const char *) leaves.at(0)->key);
|
||||
}
|
||||
@ -830,7 +830,7 @@ TEST(ArtTest, test_art_search_ill_like_tokens) {
|
||||
|
||||
std::vector<art_leaf *> leaves;
|
||||
art_fuzzy_search(&t, (const unsigned char*)key.c_str(), key.size()+1, 0, 0, 10,
|
||||
FREQUENCY, true, leaves);
|
||||
FREQUENCY, true, nullptr, 0, leaves);
|
||||
|
||||
ASSERT_EQ(1, leaves.size());
|
||||
ASSERT_STREQ(key.c_str(), (const char *) leaves.at(0)->key);
|
||||
@ -839,7 +839,7 @@ TEST(ArtTest, test_art_search_ill_like_tokens) {
|
||||
|
||||
// non prefix
|
||||
art_fuzzy_search(&t, (const unsigned char*)key.c_str(), key.size()+1, 0, 0, 10,
|
||||
FREQUENCY, false, leaves);
|
||||
FREQUENCY, false, nullptr, 0, leaves);
|
||||
ASSERT_EQ(1, leaves.size());
|
||||
ASSERT_STREQ(key.c_str(), (const char *) leaves.at(0)->key);
|
||||
}
|
||||
@ -873,7 +873,7 @@ TEST(ArtTest, test_art_search_ill_like_tokens2) {
|
||||
|
||||
std::vector<art_leaf *> leaves;
|
||||
art_fuzzy_search(&t, (const unsigned char*)key.c_str(), key.size()+1, 0, 0, 10,
|
||||
FREQUENCY, true, leaves);
|
||||
FREQUENCY, true, nullptr, 0, leaves);
|
||||
|
||||
ASSERT_EQ(1, leaves.size());
|
||||
ASSERT_STREQ(key.c_str(), (const char *) leaves.at(0)->key);
|
||||
@ -882,7 +882,7 @@ TEST(ArtTest, test_art_search_ill_like_tokens2) {
|
||||
|
||||
// non prefix
|
||||
art_fuzzy_search(&t, (const unsigned char*)key.c_str(), key.size()+1, 0, 0, 10,
|
||||
FREQUENCY, false, leaves);
|
||||
FREQUENCY, false, nullptr, 0, leaves);
|
||||
ASSERT_EQ(1, leaves.size());
|
||||
ASSERT_STREQ(key.c_str(), (const char *) leaves.at(0)->key);
|
||||
}
|
||||
|
@ -675,4 +675,49 @@ TEST_F(CollectionFilteringTest, ComparatorsOnMultiValuedNumericalField) {
|
||||
}
|
||||
|
||||
collectionManager.drop_collection("coll_array_fields");
|
||||
}
|
||||
|
||||
TEST_F(CollectionFilteringTest, FilteringWithPrefixSearch) {
|
||||
Collection *coll1;
|
||||
|
||||
std::vector<field> fields = {field("title", field_types::STRING, false),
|
||||
field("points", field_types::INT32, false),};
|
||||
|
||||
coll1 = collectionManager.get_collection("coll1");
|
||||
if(coll1 == nullptr) {
|
||||
coll1 = collectionManager.create_collection("coll1", 1, fields, "points").get();
|
||||
}
|
||||
|
||||
std::vector<std::vector<std::string>> records = {
|
||||
{"elephant"}, {"emerald"}, {"effective"}, {"esther"}, {"eagle"},
|
||||
{"empty"}, {"elite"}, {"example"}, {"elated"}, {"end"},
|
||||
{"ear"}, {"eager"}, {"earmark"}, {"envelop"}, {"excess"},
|
||||
{"ember"}, {"earth"}, {"envoy"}, {"emerge"}, {"emigrant"},
|
||||
{"envision"}, {"envy"}, {"envisage"}, {"executive"}, {"end"},
|
||||
};
|
||||
|
||||
for(size_t i=0; i<records.size(); i++) {
|
||||
nlohmann::json doc;
|
||||
|
||||
doc["id"] = std::to_string(i);
|
||||
doc["title"] = records[i][0];
|
||||
doc["points"] = i;
|
||||
|
||||
ASSERT_TRUE(coll1->add(doc.dump()).ok());
|
||||
}
|
||||
|
||||
// pick a location close to only the Sacre Coeur
|
||||
auto res_op = coll1->search("e",
|
||||
{"title"}, "points: 23",
|
||||
{}, {}, 0, 10, 1, FREQUENCY, true);
|
||||
|
||||
auto results = res_op.get();
|
||||
LOG(INFO) << results;
|
||||
|
||||
ASSERT_EQ(1, results["found"].get<size_t>());
|
||||
ASSERT_EQ(1, results["hits"].size());
|
||||
|
||||
ASSERT_STREQ("23", results["hits"][0]["document"]["id"].get<std::string>().c_str());
|
||||
|
||||
collectionManager.drop_collection("coll1");
|
||||
}
|
@ -2,7 +2,6 @@
|
||||
#include "sorted_array.h"
|
||||
#include <vector>
|
||||
#include <fstream>
|
||||
#include "string_utils.h"
|
||||
|
||||
TEST(SortedArrayTest, Append) {
|
||||
sorted_array arr;
|
||||
@ -239,4 +238,21 @@ TEST(SortedArrayTest, BulkIndexOf) {
|
||||
auto search_id = search_ids.at(i);
|
||||
ASSERT_EQ(ids.indexOf(search_id), results[i]);
|
||||
}
|
||||
|
||||
// search with IDs that don't exist
|
||||
|
||||
search_ids = {100};
|
||||
delete [] results;
|
||||
results = new uint32_t[search_ids.size()];
|
||||
|
||||
ids.indexOf(&search_ids[0], search_ids.size(), results);
|
||||
ASSERT_EQ(562, results[0]);
|
||||
|
||||
search_ids = {100, 105};
|
||||
delete [] results;
|
||||
results = new uint32_t[search_ids.size()];
|
||||
|
||||
ids.indexOf(&search_ids[0], search_ids.size(), results);
|
||||
ASSERT_EQ(562, results[0]);
|
||||
ASSERT_EQ(562, results[1]);
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user