diff --git a/include/index.h b/include/index.h index ef05e6c4..c08479a4 100644 --- a/include/index.h +++ b/include/index.h @@ -292,7 +292,7 @@ private: public: // for limiting number of results on multiple candidates / query rewrites - enum {TYPO_TOKENS_THRESHOLD = 100}; + enum {TYPO_TOKENS_THRESHOLD = 1}; // for limiting number of fields that can be searched on enum {FIELD_LIMIT_NUM = 100}; @@ -301,7 +301,7 @@ public: // If the number of results found is less than this threshold, Typesense will attempt to drop the tokens // in the query that have the least individual hits one by one until enough results are found. - static const int DROP_TOKENS_THRESHOLD = 10; + static const int DROP_TOKENS_THRESHOLD = 1; Index() = delete; diff --git a/src/index.cpp b/src/index.cpp index 0ad07385..ed9a4327 100644 --- a/src/index.cpp +++ b/src/index.cpp @@ -2025,7 +2025,7 @@ void Index::search_field(const uint8_t & field_id, num_tokens_dropped, field, filter_ids, filter_ids_length, curated_ids,facets, sort_fields, num_typos,searched_queries, topster, groups_processed, all_result_ids, all_result_ids_len, field_num_results, group_limit, group_by_fields, prioritize_exact_match, - token_order, prefix, combination_limit); + token_order, prefix, drop_tokens_threshold, typo_tokens_threshold, combination_limit); } } diff --git a/test/collection_grouping_test.cpp b/test/collection_grouping_test.cpp index 38ea1cda..9c71b27e 100644 --- a/test/collection_grouping_test.cpp +++ b/test/collection_grouping_test.cpp @@ -287,7 +287,7 @@ TEST_F(CollectionGroupingTest, GroupingWithMultiFieldRelevance) { auto results = coll1->search("Dustin Kensrue Down There by the Train", {"title", "artist"}, "", {}, {}, {0}, 10, 1, FREQUENCY, - {false}, Index::DROP_TOKENS_THRESHOLD, + {false}, 10, spp::sparse_hash_set(), spp::sparse_hash_set(), 10, "", 30, 5, "", 10, diff --git a/test/collection_locale_test.cpp b/test/collection_locale_test.cpp index 75fe60b1..cf1d8a33 100644 --- a/test/collection_locale_test.cpp +++ b/test/collection_locale_test.cpp @@ -349,7 +349,10 @@ TEST_F(CollectionLocaleTest, KoreanTextPrefixConsonant) { // To ensure that NFKD works, we will test for both ᄀ (Hangul Choseong Kiyeok) auto results = coll1->search("서울특별시 ᄀ", - {"title"}, "", {}, sort_fields, {0}, 10, 1, FREQUENCY, {true}).get(); + {"title"}, "", {}, sort_fields, {0}, 10, 1, FREQUENCY, {true}, 10, + spp::sparse_hash_set(), + spp::sparse_hash_set(), 10, "", 30, 5, + "", 10).get(); ASSERT_EQ(6, results["found"].get()); ASSERT_EQ(6, results["hits"].size()); @@ -357,7 +360,10 @@ TEST_F(CollectionLocaleTest, KoreanTextPrefixConsonant) { // and ㄱ (Hangul Letter Kiyeok) results = coll1->search("서울특별시 ㄱ", - {"title"}, "", {}, sort_fields, {0}, 10, 1, FREQUENCY, {true}).get(); + {"title"}, "", {}, sort_fields, {0}, 10, 1, FREQUENCY, {true}, 10, + spp::sparse_hash_set(), + spp::sparse_hash_set(), 10, "", 30, 5, + "", 10).get(); ASSERT_EQ(6, results["found"].get()); ASSERT_EQ(6, results["hits"].size()); @@ -365,7 +371,10 @@ TEST_F(CollectionLocaleTest, KoreanTextPrefixConsonant) { // search for full word results = coll1->search("서울특별시 관", - {"title"}, "", {}, sort_fields, {0}, 10, 1, FREQUENCY, {true}).get(); + {"title"}, "", {}, sort_fields, {0}, 10, 1, FREQUENCY, {true}, 10, + spp::sparse_hash_set(), + spp::sparse_hash_set(), 10, "", 30, 5, + "", 10).get(); ASSERT_EQ(6, results["found"].get()); ASSERT_EQ(6, results["hits"].size()); @@ -407,7 +416,10 @@ TEST_F(CollectionLocaleTest, KoreanTextPrefixVowel) { std::vector sort_fields = { sort_by(sort_field_const::text_match, "DESC"), sort_by("points", "DESC") }; auto results = coll1->search("서울특별시 고", - {"title"}, "", {}, sort_fields, {0}, 10, 1, FREQUENCY, {true}).get(); + {"title"}, "", {}, sort_fields, {0}, 10, 1, FREQUENCY, {true}, 10, + spp::sparse_hash_set(), + spp::sparse_hash_set(), 10, "", 30, 5, + "", 10).get(); ASSERT_EQ(6, results["found"].get()); ASSERT_EQ(6, results["hits"].size()); diff --git a/test/collection_sorting_test.cpp b/test/collection_sorting_test.cpp index 5c2283b0..8e088361 100644 --- a/test/collection_sorting_test.cpp +++ b/test/collection_sorting_test.cpp @@ -438,7 +438,10 @@ TEST_F(CollectionSortingTest, ThreeSortFieldsTextMatchLast) { std::vector sort_fields = { sort_by("popularity", "DESC"), sort_by("points", "DESC"), sort_by(sort_field_const::text_match, "DESC") }; auto res = coll1->search("grant", - {"title","artist"}, "", {}, sort_fields, {1}, 10, 1, FREQUENCY).get(); + {"title","artist"}, "", {}, sort_fields, {1}, 10, 1, FREQUENCY, {false}, 10, + spp::sparse_hash_set(), + spp::sparse_hash_set(), 10, "", 30, 5, + "", 10).get(); ASSERT_EQ(2, res["found"].get()); ASSERT_STREQ("1", res["hits"][0]["document"]["id"].get().c_str()); @@ -479,7 +482,10 @@ TEST_F(CollectionSortingTest, SingleFieldTextMatchScoreDefault) { auto results = coll1->search("alpha", {"title"}, "", {}, sort_fields, {2}, 10, 1, FREQUENCY, - {false}, 10).get(); + {false}, 10, + spp::sparse_hash_set(), + spp::sparse_hash_set(), 10, "", 30, 5, + "", 10).get(); ASSERT_EQ(3, results["found"].get()); ASSERT_EQ(3, results["hits"].size()); diff --git a/test/collection_specific_test.cpp b/test/collection_specific_test.cpp index 7b9d302c..d7d650b0 100644 --- a/test/collection_specific_test.cpp +++ b/test/collection_specific_test.cpp @@ -410,7 +410,10 @@ TEST_F(CollectionSpecificTest, PrefixVsExactMatch) { } auto results = coll1->search("ration", - {"title"}, "", {}, {}, {1}, 10, 1, FREQUENCY, {true}).get(); + {"title"}, "", {}, {}, {1}, 10, 1, FREQUENCY, {true}, 10, + spp::sparse_hash_set(), + spp::sparse_hash_set(), 10, "", 30, 5, + "", 10).get(); ASSERT_EQ(4, results["found"].get()); ASSERT_EQ(4, results["hits"].size()); @@ -818,7 +821,7 @@ TEST_F(CollectionSpecificTest, PrefixSearchOnlyOnLastToken) { "", {}, {}, {1}, 10, 1, FREQUENCY, {true}, 0, spp::sparse_hash_set(), - spp::sparse_hash_set(), 10, "", 30, 4, "description", 20, {}, {}, {}, 0, + spp::sparse_hash_set(), 10, "", 30, 4, "concat", 20, {}, {}, {}, 0, "", "").get(); ASSERT_EQ(0, results["hits"][0]["highlights"].size()); diff --git a/test/collection_test.cpp b/test/collection_test.cpp index 465de79c..187845df 100644 --- a/test/collection_test.cpp +++ b/test/collection_test.cpp @@ -151,7 +151,12 @@ TEST_F(CollectionTest, ExactSearchShouldBeStable) { TEST_F(CollectionTest, PhraseSearch) { std::vector facets; - nlohmann::json results = collection->search("rocket launch", query_fields, "", facets, sort_fields, {0}, 10).get(); + nlohmann::json results = collection->search("rocket launch", query_fields, "", facets, sort_fields, {0}, 10, + 1, FREQUENCY, + {false}, 10, + spp::sparse_hash_set(), + spp::sparse_hash_set(), 10, "", 30, 5, + "", 10).get(); ASSERT_EQ(5, results["hits"].size()); ASSERT_EQ(5, results["found"].get()); @@ -180,7 +185,12 @@ TEST_F(CollectionTest, PhraseSearch) { // Check ASC sort order std::vector sort_fields_asc = { sort_by(sort_field_const::text_match, "DESC"), sort_by("points", "ASC") }; - results = collection->search("rocket launch", query_fields, "", facets, sort_fields_asc, {0}, 10).get(); + results = collection->search("rocket launch", query_fields, "", facets, sort_fields_asc, {0}, 10, + 1, FREQUENCY, + {false}, 10, + spp::sparse_hash_set(), + spp::sparse_hash_set(), 10, "", 30, 5, + "", 10).get(); ASSERT_EQ(5, results["hits"].size()); ASSERT_EQ(5, results["found"].get()); @@ -194,7 +204,12 @@ TEST_F(CollectionTest, PhraseSearch) { } // Check pagination - results = collection->search("rocket launch", query_fields, "", facets, sort_fields, {0}, 3).get(); + results = collection->search("rocket launch", query_fields, "", facets, sort_fields, {0}, 3, + 1, FREQUENCY, + {false}, 10, + spp::sparse_hash_set(), + spp::sparse_hash_set(), 10, "", 30, 5, + "", 10).get(); ASSERT_EQ(3, results["hits"].size()); ASSERT_EQ(5, results["found"].get()); @@ -212,7 +227,12 @@ TEST_F(CollectionTest, PhraseSearch) { TEST_F(CollectionTest, SearchWithExcludedTokens) { std::vector facets; - nlohmann::json results = collection->search("how -propellants -are", query_fields, "", facets, sort_fields, {0}, 10).get(); + nlohmann::json results = collection->search("how -propellants -are", query_fields, "", facets, sort_fields, {0}, 10, + 1, FREQUENCY, + {false}, 10, + spp::sparse_hash_set(), + spp::sparse_hash_set(), 10, "", 30, 5, + "", 10).get(); ASSERT_EQ(2, results["hits"].size()); ASSERT_EQ(2, results["found"].get()); @@ -276,7 +296,10 @@ TEST_F(CollectionTest, SkipUnindexedTokensDuringPhraseSearch) { // should not try to drop tokens to expand query results.clear(); - results = collection->search("the a", query_fields, "", facets, sort_fields, {0}, 10, 1, FREQUENCY, {false}, 10).get(); + results = collection->search("the a", query_fields, "", facets, sort_fields, {0}, 10, 1, FREQUENCY, {false}, 10, + spp::sparse_hash_set(), + spp::sparse_hash_set(), 10, "", 30, 5, + "", 10).get(); ASSERT_EQ(9, results["hits"].size()); results.clear(); @@ -322,7 +345,12 @@ TEST_F(CollectionTest, PartialPhraseSearch) { TEST_F(CollectionTest, QueryWithTypo) { std::vector facets; - nlohmann::json results = collection->search("kind biologcal", query_fields, "", facets, sort_fields, {2}, 3).get(); + nlohmann::json results = collection->search("kind biologcal", query_fields, "", facets, sort_fields, {2}, 3, + 1, FREQUENCY, + {false}, 10, + spp::sparse_hash_set(), + spp::sparse_hash_set(), 10, "", 30, 5, + "", 10).get(); ASSERT_EQ(3, results["hits"].size()); std::vector ids = {"19", "3", "20"}; @@ -335,7 +363,12 @@ TEST_F(CollectionTest, QueryWithTypo) { } results.clear(); - results = collection->search("fer thx", query_fields, "", facets, sort_fields, {1}, 3).get(); + results = collection->search("fer thx", query_fields, "", facets, sort_fields, {1}, 3, + 1, FREQUENCY, + {false}, 10, + spp::sparse_hash_set(), + spp::sparse_hash_set(), 10, "", 30, 5, + "", 10).get(); ids = {"1", "10", "13"}; ASSERT_EQ(3, results["hits"].size()); @@ -411,7 +444,9 @@ TEST_F(CollectionTest, TypoTokenRankedByScoreAndFrequency) { TEST_F(CollectionTest, TextContainingAnActualTypo) { // A line contains "ISX" but not "what" - need to ensure that correction to "ISS what" happens std::vector facets; - nlohmann::json results = collection->search("ISX what", query_fields, "", facets, sort_fields, {1}, 4, 1, FREQUENCY, {false}).get(); + nlohmann::json results = collection->search("ISX what", query_fields, "", facets, sort_fields, {1}, 4, 1, FREQUENCY, {false}, + 10, spp::sparse_hash_set(), spp::sparse_hash_set(), + 10, "", 30, 5, "", 10).get(); ASSERT_EQ(4, results["hits"].size()); ASSERT_EQ(13, results["found"].get()); @@ -425,7 +460,10 @@ TEST_F(CollectionTest, TextContainingAnActualTypo) { } // Record containing exact token match should appear first - results = collection->search("ISX", query_fields, "", facets, sort_fields, {1}, 10, 1, FREQUENCY, {false}).get(); + results = collection->search("ISX", query_fields, "", facets, sort_fields, {1}, 10, 1, FREQUENCY, {false}, 10, + spp::sparse_hash_set(), + spp::sparse_hash_set(), 10, "", 30, 5, + "", 10).get(); ASSERT_EQ(8, results["hits"].size()); ASSERT_EQ(8, results["found"].get()); @@ -547,7 +585,10 @@ TEST_F(CollectionTest, PrefixSearching) { ASSERT_STREQ(id.c_str(), result_id.c_str()); } - results = collection->search("what ex", query_fields, "", facets, sort_fields, {0}, 10, 1, MAX_SCORE, {true}).get(); + results = collection->search("what ex", query_fields, "", facets, sort_fields, {0}, 10, 1, MAX_SCORE, {true}, 10, + spp::sparse_hash_set(), + spp::sparse_hash_set(), 10, "", 30, 5, + "", 10).get(); ASSERT_EQ(9, results["hits"].size()); ids = {"6", "12", "19", "22", "13", "8", "15", "24", "21"}; @@ -559,7 +600,10 @@ TEST_F(CollectionTest, PrefixSearching) { } // restrict to only 2 results and differentiate between MAX_SCORE and FREQUENCY - results = collection->search("t", query_fields, "", facets, sort_fields, {0}, 2, 1, MAX_SCORE, {true}).get(); + results = collection->search("t", query_fields, "", facets, sort_fields, {0}, 2, 1, MAX_SCORE, {true}, 10, + spp::sparse_hash_set(), + spp::sparse_hash_set(), 10, "", 30, 5, + "", 10).get(); ASSERT_EQ(2, results["hits"].size()); ids = {"19", "22"}; @@ -570,7 +614,10 @@ TEST_F(CollectionTest, PrefixSearching) { ASSERT_STREQ(id.c_str(), result_id.c_str()); } - results = collection->search("t", query_fields, "", facets, sort_fields, {0}, 2, 1, FREQUENCY, {true}).get(); + results = collection->search("t", query_fields, "", facets, sort_fields, {0}, 2, 1, FREQUENCY, {true}, 10, + spp::sparse_hash_set(), + spp::sparse_hash_set(), 10, "", 30, 5, + "", 10).get(); ASSERT_EQ(2, results["hits"].size()); ids = {"19", "22"};