mirror of
https://github.com/typesense/typesense.git
synced 2025-05-17 12:12:35 +08:00
Fix match contains edge case on overrides.
This commit is contained in:
parent
95c60db1a2
commit
87e8e7b0ce
@ -319,4 +319,6 @@ struct StringUtils {
|
||||
static std::string trim_curly_spaces(const std::string& str);
|
||||
|
||||
static bool ends_with(std::string const &str, std::string const &ending);
|
||||
|
||||
static bool contains_word(const std::string& haystack, const std::string& needle);
|
||||
};
|
@ -431,8 +431,9 @@ void Collection::curate_results(string& actual_query, bool enable_overrides, boo
|
||||
continue;
|
||||
}
|
||||
|
||||
if( (override.rule.match == override_t::MATCH_EXACT && override.rule.query == query) ||
|
||||
(override.rule.match == override_t::MATCH_CONTAINS && query.find(override.rule.query) != std::string::npos) ) {
|
||||
if ((override.rule.match == override_t::MATCH_EXACT && override.rule.query == query) ||
|
||||
(override.rule.match == override_t::MATCH_CONTAINS &&
|
||||
StringUtils::contains_word(query, override.rule.query))) {
|
||||
|
||||
// have to ensure that dropped hits take precedence over added hits
|
||||
for(const auto & hit: override.drop_hits) {
|
||||
|
@ -1153,12 +1153,12 @@ void Index::search_candidates(const uint8_t & field_id, bool field_is_array,
|
||||
//LOG(INFO) << "field_num_results: " << field_num_results << ", typo_tokens_threshold: " << typo_tokens_threshold;
|
||||
//LOG(INFO) << "n: " << n;
|
||||
|
||||
/*std::stringstream fullq;
|
||||
std::stringstream fullq;
|
||||
for(const auto& qleaf : actual_query_suggestion) {
|
||||
std::string qtok(reinterpret_cast<char*>(qleaf->key),qleaf->key_len - 1);
|
||||
fullq << qtok << " ";
|
||||
}
|
||||
LOG(INFO) << "field: " << size_t(field_id) << ", query: " << fullq.str() << ", total_cost: " << total_cost;*/
|
||||
LOG(INFO) << "field: " << size_t(field_id) << ", query: " << fullq.str() << ", total_cost: " << total_cost;
|
||||
|
||||
// Prepare excluded document IDs that we can later remove from the result set
|
||||
uint32_t* excluded_result_ids = nullptr;
|
||||
@ -1735,8 +1735,9 @@ bool Index::static_filter_query_eval(const override_t* override,
|
||||
|
||||
std::string query = StringUtils::join(tokens, " ");
|
||||
|
||||
if( (override->rule.match == override_t::MATCH_EXACT && override->rule.query == query) ||
|
||||
(override->rule.match == override_t::MATCH_CONTAINS && query.find(override->rule.query) != std::string::npos) ) {
|
||||
if ((override->rule.match == override_t::MATCH_EXACT && override->rule.query == query) ||
|
||||
(override->rule.match == override_t::MATCH_CONTAINS &&
|
||||
StringUtils::contains_word(query, override->rule.query))) {
|
||||
|
||||
Option<bool> filter_op = filter::parse_filter_query(override->filter_by, search_schema,
|
||||
store, "", filters);
|
||||
|
@ -291,6 +291,28 @@ bool StringUtils::ends_with(const std::string& str, const std::string& ending) {
|
||||
}
|
||||
}
|
||||
|
||||
bool StringUtils::contains_word(const std::string& haystack, const std::string& needle) {
|
||||
size_t pos = haystack.find(needle);
|
||||
if(pos == std::string::npos) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if(pos == 0 && haystack.size() == needle.size()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if(pos != 0 && haystack[pos - 1] != ' ') {
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t end_pos = pos + needle.size();
|
||||
if(end_pos < haystack.size() and haystack[end_pos] != ' ') {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*size_t StringUtils::unicode_length(const std::string& bytes) {
|
||||
std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> utf8conv;
|
||||
return utf8conv.from_bytes(bytes).size();
|
||||
|
@ -171,6 +171,14 @@ TEST_F(CollectionOverrideTest, ExcludeIncludeExactQueryMatch) {
|
||||
ASSERT_STREQ("2", results["hits"][2]["document"]["id"].get<std::string>().c_str());
|
||||
ASSERT_STREQ("1", results["hits"][3]["document"]["id"].get<std::string>().c_str());
|
||||
|
||||
// partial word should not match
|
||||
res_op = coll_mul_fields->search("dowillow", {"title"}, "", {}, {}, {0}, 10);
|
||||
ASSERT_TRUE(res_op.ok());
|
||||
results = res_op.get();
|
||||
|
||||
ASSERT_EQ(0, results["hits"].size());
|
||||
ASSERT_EQ(0, results["found"].get<uint32_t>());
|
||||
|
||||
// ability to disable overrides
|
||||
bool enable_overrides = false;
|
||||
res_op = coll_mul_fields->search("will", {"title"}, "", {}, {}, {0}, 10,
|
||||
@ -1531,6 +1539,13 @@ TEST_F(CollectionOverrideTest, StaticFiltering) {
|
||||
ASSERT_EQ(1, results["hits"].size());
|
||||
ASSERT_EQ("0", results["hits"][0]["document"]["id"].get<std::string>());
|
||||
|
||||
// partial word should not match
|
||||
results = coll1->search("inexpensive shoes", {"name"}, "",
|
||||
{}, sort_fields, {2}, 10, 1, FREQUENCY, {true}, 10).get();
|
||||
|
||||
ASSERT_EQ(2, results["found"].get<uint32_t>());
|
||||
ASSERT_EQ(2, results["hits"].size());
|
||||
|
||||
// with synonum for expensive
|
||||
synonym_t synonym1{"costly-expensive", {"costly"}, {{"expensive"}} };
|
||||
coll1->add_synonym(synonym1);
|
||||
|
@ -282,3 +282,17 @@ TEST(StringUtilsTest, ShouldTrimCurlySpaces) {
|
||||
ASSERT_EQ("{}", StringUtils::trim_curly_spaces("{ }"));
|
||||
ASSERT_EQ("foo {bar} {baz}", StringUtils::trim_curly_spaces("foo { bar } { baz}"));
|
||||
}
|
||||
|
||||
TEST(StringUtilsTest, ContainsWord) {
|
||||
ASSERT_TRUE(StringUtils::contains_word("foo bar", "foo"));
|
||||
ASSERT_TRUE(StringUtils::contains_word("foo bar", "bar"));
|
||||
ASSERT_TRUE(StringUtils::contains_word("foo bar baz", "bar"));
|
||||
ASSERT_TRUE(StringUtils::contains_word("foo bar baz", "foo bar"));
|
||||
ASSERT_TRUE(StringUtils::contains_word("foo bar baz", "bar baz"));
|
||||
|
||||
ASSERT_FALSE(StringUtils::contains_word("foobar", "bar"));
|
||||
ASSERT_FALSE(StringUtils::contains_word("foobar", "foo"));
|
||||
ASSERT_FALSE(StringUtils::contains_word("foobar baz", "bar"));
|
||||
ASSERT_FALSE(StringUtils::contains_word("foobar baz", "bar baz"));
|
||||
ASSERT_FALSE(StringUtils::contains_word("baz foobar", "foo"));
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user