From 0ae718d067a4ec94ffba592824695234621f563e Mon Sep 17 00:00:00 2001 From: Kishore Nallan Date: Fri, 16 Jul 2021 12:07:44 +0530 Subject: [PATCH] Use all candidates of a given num_typo value. Typo tokens threshold should not trigger when we have explored only some of the candidates of a given num_typo value. --- src/index.cpp | 14 ++++++++----- test/collection_specific_test.cpp | 33 +++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 5 deletions(-) diff --git a/src/index.cpp b/src/index.cpp index 38ee7555..627729cc 100644 --- a/src/index.cpp +++ b/src/index.cpp @@ -910,6 +910,8 @@ void Index::search_candidates(const uint8_t & field_id, auto product = []( long long a, token_candidates & b ) { return a*b.candidates.size(); }; long long int N = std::accumulate(token_candidates_vec.begin(), token_candidates_vec.end(), 1LL, product); + size_t last_cost = 0; + for(long long n=0; n query_suggestion(token_candidates_vec.size()); @@ -921,6 +923,13 @@ void Index::search_candidates(const uint8_t & field_id, uint32_t total_cost = next_suggestion(token_candidates_vec, n, actual_query_suggestion, query_suggestion, token_bits); + //LOG(INFO) << "field_num_results: " << field_num_results << ", typo_tokens_threshold: " << typo_tokens_threshold; + if(total_cost != last_cost && field_num_results >= typo_tokens_threshold) { + //break; + } + + last_cost = total_cost; + /*LOG(INFO) << "n: " << n; for(size_t i=0; i < actual_query_suggestion.size(); i++) { LOG(INFO) << "i: " << i << " - " << actual_query_suggestion[i]->key << ", ids: " @@ -1019,11 +1028,6 @@ void Index::search_candidates(const uint8_t & field_id, } searched_queries.push_back(actual_query_suggestion); - - //LOG(INFO) << "field_num_results: " << field_num_results << ", typo_tokens_threshold: " << typo_tokens_threshold; - if(field_num_results >= typo_tokens_threshold) { - break; - } } } diff --git a/test/collection_specific_test.cpp b/test/collection_specific_test.cpp index 52d5979b..ca9163c5 100644 --- a/test/collection_specific_test.cpp +++ b/test/collection_specific_test.cpp @@ -276,6 +276,39 @@ TEST_F(CollectionSpecificTest, MultiFieldArrayRepeatingTokens) { collectionManager.drop_collection("coll1"); } +TEST_F(CollectionSpecificTest, ExactMatchOnPrefix) { + std::vector fields = {field("title", field_types::STRING, false), + field("points", field_types::INT32, false),}; + + Collection* coll1 = collectionManager.create_collection("coll1", 1, fields, "points").get(); + + nlohmann::json doc1; + doc1["id"] = "0"; + doc1["title"] = "Yeshivah Gedolah High School"; + doc1["points"] = 100; + + nlohmann::json doc2; + doc2["id"] = "1"; + doc2["title"] = "GED"; + doc2["points"] = 50; + + ASSERT_TRUE(coll1->add(doc1.dump()).ok()); + ASSERT_TRUE(coll1->add(doc2.dump()).ok()); + + auto results = coll1->search("ged", {"title"}, "", {}, {}, {2}, 10, + 1, FREQUENCY, {true}, Index::DROP_TOKENS_THRESHOLD, + spp::sparse_hash_set(), + spp::sparse_hash_set(), 10, "", 30, 5, + "", 1).get(); + + ASSERT_EQ(2, results["hits"].size()); + + ASSERT_EQ("1", results["hits"][0]["document"]["id"].get()); + ASSERT_EQ("0", results["hits"][1]["document"]["id"].get()); + + collectionManager.drop_collection("coll1"); +} + TEST_F(CollectionSpecificTest, PrefixWithTypos) { std::vector fields = {field("title", field_types::STRING, false), field("points", field_types::INT32, false),};