Exhaustive search should not always be enabled during token drop search.

2025-05-21 22:33:27 +08:00 · 2021-09-18 15:55:16 +05:30 · 2021-09-18 15:55:16 +05:30 · 27b392cee9
commit 27b392cee9
parent 6621280ffc
2 changed files with 8 additions and 6 deletions
--- a/src/index.cpp
+++ b/src/index.cpp
@ -2269,6 +2269,7 @@ void Index::search_field(const uint8_t & field_id,
            costs[i] = token_to_costs[i][q.rem];
        }

+        unique_tokens.clear();
        token_candidates_vec.clear();
        size_t token_index = 0;

@ -2388,7 +2389,7 @@ void Index::search_field(const uint8_t & field_id,
                            sort_fields, num_typos,searched_queries, topster, groups_processed, all_result_ids,
                            all_result_ids_len, field_num_results, group_limit, group_by_fields,
                            prioritize_exact_match, concurrency,
-                            token_order, prefix, drop_tokens_threshold, typo_tokens_threshold, combination_limit);
+                            token_order, prefix, drop_tokens_threshold, typo_tokens_threshold, exhaustive_search);
    }
 }

--- a/test/collection_test.cpp
+++ b/test/collection_test.cpp
@ -317,7 +317,7 @@ TEST_F(CollectionTest, SkipUnindexedTokensDuringPhraseSearch) {
                                 spp::sparse_hash_set<std::string>(),
                                 spp::sparse_hash_set<std::string>(), 10, "", 30, 5,
                                 "", 10).get();
-    ASSERT_EQ(9, results["hits"].size());
+    ASSERT_EQ(7, results["hits"].size());

    results.clear();
    results = collection->search("the a", query_fields, "", facets, sort_fields, {0}, 10, 1, FREQUENCY, {false}, 0).get();
@ -347,10 +347,11 @@ TEST_F(CollectionTest, SkipUnindexedTokensDuringPhraseSearch) {

 TEST_F(CollectionTest, PartialPhraseSearch) {
    std::vector<std::string> facets;
-    nlohmann::json results = collection->search("rocket research", query_fields, "", facets, sort_fields, {0}, 10).get();
-    ASSERT_EQ(6, results["hits"].size());
+    nlohmann::json results = collection->search("rocket research", query_fields, "", facets,
+                                                sort_fields, {0}, 10, 1, FREQUENCY, {false}, 10).get();
+    ASSERT_EQ(4, results["hits"].size());

-    std::vector<std::string> ids = {"19", "1", "10", "8", "16", "17"};
+    std::vector<std::string> ids = {"1", "8", "16", "17"};

    for(size_t i = 0; i < results["hits"].size(); i++) {
        nlohmann::json result = results["hits"].at(i);
@ -648,7 +649,7 @@ TEST_F(CollectionTest, PrefixSearching) {
    }

    // only the last token in the query should be used for prefix search - so, "math" should not match "mathematics"
-    results = collection->search("math fx", query_fields, "", facets, sort_fields, {0}, 1, 1, FREQUENCY, {true}).get();
+    results = collection->search("math fx", query_fields, "", facets, sort_fields, {0}, 1, 1, FREQUENCY, {true}, 0).get();
    ASSERT_EQ(0, results["hits"].size());

    // single and double char prefixes should set a ceiling on the num_typos possible