mirror of
https://github.com/typesense/typesense.git
synced 2025-05-22 06:40:30 +08:00
Fix exact filtering edge cases.
This commit is contained in:
parent
f37e8e9928
commit
c33a8fad19
@ -1381,7 +1381,7 @@ void Index::do_filtering(uint32_t*& filter_ids, uint32_t& filter_ids_length,
|
||||
}
|
||||
|
||||
// For NOT_EQUALS alone, it is okay for none of the results to match prior to negation
|
||||
// e.g. field:- [RANDOM_NON_EXISTING_STRING]
|
||||
// e.g. field:!= [RANDOM_NON_EXISTING_STRING]
|
||||
if(a_filter.comparators[0] != NOT_EQUALS && posting_lists.size() != str_tokens.size()) {
|
||||
continue;
|
||||
}
|
||||
|
@ -1019,7 +1019,8 @@ void posting_list_t::get_exact_matches(std::vector<iterator_t>& its, const bool
|
||||
|
||||
if(j == its.size()-1) {
|
||||
// check if the last query token is the last offset
|
||||
if(offsets[end_offset_index-1] != 0) {
|
||||
if( offsets[end_offset_index-1] != 0 ||
|
||||
(end_offset_index-2 >= 0 && offsets[end_offset_index-2] != its.size())) {
|
||||
// not the last token for the document, so skip
|
||||
is_exact_match = false;
|
||||
break;
|
||||
@ -1029,6 +1030,7 @@ void posting_list_t::get_exact_matches(std::vector<iterator_t>& its, const bool
|
||||
// looping handles duplicate query tokens, e.g. "hip hip hurray hurray"
|
||||
while(start_offset_index < end_offset_index) {
|
||||
uint32_t offset = offsets[start_offset_index];
|
||||
start_offset_index++;
|
||||
|
||||
if(offset == (j + 1)) {
|
||||
// we have found a matching index, no need to look further
|
||||
@ -1094,7 +1096,7 @@ void posting_list_t::get_exact_matches(std::vector<iterator_t>& its, const bool
|
||||
|
||||
if(start_offset_index+1 < end_offset_index) {
|
||||
size_t next_offset = (size_t) offsets[start_offset_index + 1];
|
||||
if(next_offset == 0) {
|
||||
if(next_offset == 0 && pos == its.size()) {
|
||||
// indicates that token is the last token on the doc
|
||||
has_atleast_one_last_token = true;
|
||||
start_offset_index++;
|
||||
|
@ -1961,4 +1961,102 @@ TEST_F(CollectionFilteringTest, FilteringWithTokenSeparators) {
|
||||
ASSERT_EQ(1, results["hits"].size());
|
||||
|
||||
collectionManager.drop_collection("coll2");
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(CollectionFilteringTest, ExactFilteringRepeatingTokensSingularField) {
|
||||
std::vector<field> fields = {field("name", field_types::STRING, true)};
|
||||
|
||||
Collection* coll1 = collectionManager.create_collection(
|
||||
"coll1", 1, fields, "", 0, "", {}, {"."}
|
||||
).get();
|
||||
|
||||
nlohmann::json doc1;
|
||||
doc1["id"] = "0";
|
||||
doc1["name"] = "Cardiology - Interventional Cardiology";
|
||||
|
||||
nlohmann::json doc2;
|
||||
doc2["id"] = "1";
|
||||
doc2["name"] = "Cardiology - Interventional";
|
||||
|
||||
nlohmann::json doc3;
|
||||
doc3["id"] = "2";
|
||||
doc3["name"] = "Cardiology - Interventional Cardiology Department";
|
||||
|
||||
nlohmann::json doc4;
|
||||
doc4["id"] = "3";
|
||||
doc4["name"] = "Interventional Cardiology - Interventional Cardiology";
|
||||
|
||||
ASSERT_TRUE(coll1->add(doc1.dump()).ok());
|
||||
ASSERT_TRUE(coll1->add(doc2.dump()).ok());
|
||||
ASSERT_TRUE(coll1->add(doc3.dump()).ok());
|
||||
ASSERT_TRUE(coll1->add(doc4.dump()).ok());
|
||||
|
||||
auto results = coll1->search("*", {},"name:=Cardiology - Interventional Cardiology", {}, {}, {0}, 10,
|
||||
1, FREQUENCY, {false}).get();
|
||||
ASSERT_EQ(1, results["hits"].size());
|
||||
ASSERT_EQ("0", results["hits"][0]["document"]["id"].get<std::string>());
|
||||
|
||||
results = coll1->search("*", {},"name:=Cardiology - Interventional", {}, {}, {0}, 10,
|
||||
1, FREQUENCY, {false}).get();
|
||||
ASSERT_EQ(1, results["hits"].size());
|
||||
ASSERT_EQ("1", results["hits"][0]["document"]["id"].get<std::string>());
|
||||
|
||||
results = coll1->search("*", {},"name:=Interventional Cardiology", {}, {}, {0}, 10,
|
||||
1, FREQUENCY, {false}).get();
|
||||
ASSERT_EQ(0, results["hits"].size());
|
||||
|
||||
results = coll1->search("*", {},"name:=Cardiology", {}, {}, {0}, 10,
|
||||
1, FREQUENCY, {false}).get();
|
||||
ASSERT_EQ(0, results["hits"].size());
|
||||
|
||||
collectionManager.drop_collection("coll1");
|
||||
}
|
||||
|
||||
TEST_F(CollectionFilteringTest, ExactFilteringRepeatingTokensArrayField) {
|
||||
std::vector<field> fields = {field("name", field_types::STRING_ARRAY, true)};
|
||||
|
||||
Collection* coll1 = collectionManager.create_collection(
|
||||
"coll1", 1, fields, "", 0, "", {}, {"."}
|
||||
).get();
|
||||
|
||||
nlohmann::json doc1;
|
||||
doc1["id"] = "0";
|
||||
doc1["name"] = {"Cardiology - Interventional Cardiology"};
|
||||
|
||||
nlohmann::json doc2;
|
||||
doc2["id"] = "1";
|
||||
doc2["name"] = {"Cardiology - Interventional"};
|
||||
|
||||
nlohmann::json doc3;
|
||||
doc3["id"] = "2";
|
||||
doc3["name"] = {"Cardiology - Interventional Cardiology Department"};
|
||||
|
||||
nlohmann::json doc4;
|
||||
doc4["id"] = "3";
|
||||
doc4["name"] = {"Interventional Cardiology - Interventional Cardiology"};
|
||||
|
||||
ASSERT_TRUE(coll1->add(doc1.dump()).ok());
|
||||
ASSERT_TRUE(coll1->add(doc2.dump()).ok());
|
||||
ASSERT_TRUE(coll1->add(doc3.dump()).ok());
|
||||
ASSERT_TRUE(coll1->add(doc4.dump()).ok());
|
||||
|
||||
auto results = coll1->search("*", {},"name:=Cardiology - Interventional Cardiology", {}, {}, {0}, 10,
|
||||
1, FREQUENCY, {false}).get();
|
||||
ASSERT_EQ(1, results["hits"].size());
|
||||
ASSERT_EQ("0", results["hits"][0]["document"]["id"].get<std::string>());
|
||||
|
||||
results = coll1->search("*", {},"name:=Cardiology - Interventional", {}, {}, {0}, 10,
|
||||
1, FREQUENCY, {false}).get();
|
||||
ASSERT_EQ(1, results["hits"].size());
|
||||
ASSERT_EQ("1", results["hits"][0]["document"]["id"].get<std::string>());
|
||||
|
||||
results = coll1->search("*", {},"name:=Interventional Cardiology", {}, {}, {0}, 10,
|
||||
1, FREQUENCY, {false}).get();
|
||||
ASSERT_EQ(0, results["hits"].size());
|
||||
|
||||
results = coll1->search("*", {},"name:=Cardiology", {}, {}, {0}, 10,
|
||||
1, FREQUENCY, {false}).get();
|
||||
ASSERT_EQ(0, results["hits"].size());
|
||||
|
||||
collectionManager.drop_collection("coll1");
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user