Enable num_typos for th locale.

This commit is contained in:
Kishore Nallan 2022-08-24 12:45:34 +05:30
parent 936270b4a5
commit 63fbf807a9
2 changed files with 20 additions and 1 deletions

View File

@ -2902,7 +2902,7 @@ void Index::fuzzy_search_fields(const std::vector<search_field_t>& the_fields,
int64_t field_num_typos = (field_id < num_typos.size()) ? num_typos[the_field.orig_index] : num_typos[0];
auto& locale = search_schema.at(the_field.name).locale;
if(locale != "" && locale != "en" && !Tokenizer::is_cyrillic(locale)) {
if(locale != "" && locale != "en" && locale != "th" && !Tokenizer::is_cyrillic(locale)) {
// disable fuzzy trie traversal for non-english locales
field_num_typos = 0;
}

View File

@ -278,6 +278,7 @@ TEST_F(CollectionLocaleTest, SearchThaiTextPreSegmentedQuery) {
{"ความเหลื่อมล้ำ", "Compound Word"}, // ความ, เหลื่อม, ล้ำ
{"การกระจายรายได้", "Doc A"},
{"จารีย์", "Doc B"},
{"Meiji", "Doc C"},
};
for(size_t i=0; i<records.size(); i++) {
@ -299,6 +300,24 @@ TEST_F(CollectionLocaleTest, SearchThaiTextPreSegmentedQuery) {
ASSERT_EQ(1, results["found"].get<size_t>());
ASSERT_EQ("0", results["hits"][0]["document"]["id"].get<std::string>());
results = coll1->search("meji",
{"title"}, "", {}, {}, {2}, 10, 1, FREQUENCY, {true},
10, spp::sparse_hash_set<std::string>(),
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 40, {}, {}, {}, 0,
"<mark>", "</mark>", {1}, 1000, true, true).get();
ASSERT_EQ(1, results["found"].get<size_t>());
ASSERT_EQ("3", results["hits"][0]["document"]["id"].get<std::string>());
results = coll1->search("ควม",
{"title"}, "", {}, {}, {2}, 10, 1, FREQUENCY, {true},
10, spp::sparse_hash_set<std::string>(),
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 40, {}, {}, {}, 0,
"<mark>", "</mark>", {1}, 1000, true, true).get();
ASSERT_EQ(1, results["found"].get<size_t>());
ASSERT_EQ("0", results["hits"][0]["document"]["id"].get<std::string>());
}
TEST_F(CollectionLocaleTest, SearchAgainstThaiTextExactMatch) {