Respect drop_tokens_threshold even when that token does not exist in the index.

This commit is contained in:
Kishore Nallan 2018-04-13 06:52:16 +05:00
parent 874b5beb89
commit b186816ca5
3 changed files with 13 additions and 4 deletions

View File

@ -905,11 +905,11 @@ int art_topk_iter(const art_node *root, token_ordering token_order, size_t max_r
std::vector<art_leaf *> &results) {
printf("INSIDE art_topk_iter: root->type: %d\n", root->type);
std::priority_queue<art_node *, std::vector<const art_node *>,
std::priority_queue<const art_node *, std::vector<const art_node *>,
decltype(&compare_art_node_score_pq)> q(compare_art_node_score_pq);
if(token_order == FREQUENCY) {
q = std::priority_queue<art_node *, std::vector<const art_node *>,
q = std::priority_queue<const art_node *, std::vector<const art_node *>,
decltype(&compare_art_node_frequency_pq)>(compare_art_node_frequency_pq);
}

View File

@ -331,7 +331,7 @@ void Index::search_candidates(uint32_t* filter_ids, size_t filter_ids_length, st
// every element in `query_suggestion` contains a token and its associated hits
std::vector<art_leaf *> query_suggestion = next_suggestion(token_candidates_vec, n);
/*for(auto i=0; i < query_suggestion.size(); i++) {
/*for(size_t i=0; i < query_suggestion.size(); i++) {
LOG(INFO) << "i: " << i << " - " << query_suggestion[i]->key;
}*/
@ -715,7 +715,12 @@ void Index::search_field(std::string & query, const std::string & field, uint32_
if(it != token_to_costs[token_index].end()) {
token_to_costs[token_index].erase(it);
// no more costs left for this token, clean up
// when no more costs are left for this token and `drop_tokens_threshold` is breached
if(token_to_costs[token_index].empty() && topster.size >= drop_tokens_threshold) {
break;
}
// otherwise, we try to drop the token and search with remaining tokens
if(token_to_costs[token_index].empty()) {
token_to_costs.erase(token_to_costs.begin()+token_index);
tokens.erase(tokens.begin()+token_index);

View File

@ -223,6 +223,10 @@ TEST_F(CollectionTest, SkipUnindexedTokensDuringPhraseSearch) {
ASSERT_STREQ(id.c_str(), result_id.c_str());
}
results.clear();
results = collection->search("the a DoesNotExist", query_fields, "", facets, sort_fields, 0, 10, 1, FREQUENCY, false, 0).get();
ASSERT_EQ(0, results["hits"].size());
// with no indexed word
results.clear();
results = collection->search("DoesNotExist1 DoesNotExist2", query_fields, "", facets, sort_fields, 0, 10).get();