diff --git a/src/index.cpp b/src/index.cpp index 9093f73e..28ab879f 100644 --- a/src/index.cpp +++ b/src/index.cpp @@ -1964,6 +1964,11 @@ void Index::search(std::vector& field_query_tokens, // num tokens present across fields including those containing typos int64_t uniq_tokens_found = int64_t(__builtin_popcount(token_bits)) - 1; + // verbtaim match should not consider dropped-token cases + if(uniq_tokens_found != field_query_tokens[0].q_include_tokens.size()) { + verbatim_match_fields = 0; + } + verbatim_match_fields = std::min(255, verbatim_match_fields); exact_match_fields = std::min(255, exact_match_fields); max_weighted_tokens_match = std::min(255, max_weighted_tokens_match); diff --git a/test/collection_grouping_test.cpp b/test/collection_grouping_test.cpp index 97e56f84..c8e2a8b2 100644 --- a/test/collection_grouping_test.cpp +++ b/test/collection_grouping_test.cpp @@ -307,8 +307,8 @@ TEST_F(CollectionGroupingTest, GroupingWithMultiFieldRelevance) { ASSERT_STREQ("country", results["grouped_hits"][2]["group_key"][0].get().c_str()); ASSERT_EQ(2, results["grouped_hits"][2]["hits"].size()); - ASSERT_STREQ("3", results["grouped_hits"][2]["hits"][0]["document"]["id"].get().c_str()); - ASSERT_STREQ("8", results["grouped_hits"][2]["hits"][1]["document"]["id"].get().c_str()); + ASSERT_STREQ("8", results["grouped_hits"][2]["hits"][0]["document"]["id"].get().c_str()); + ASSERT_STREQ("3", results["grouped_hits"][2]["hits"][1]["document"]["id"].get().c_str()); collectionManager.drop_collection("coll1"); } diff --git a/test/collection_specific_test.cpp b/test/collection_specific_test.cpp index 726624c3..2a7a53b3 100644 --- a/test/collection_specific_test.cpp +++ b/test/collection_specific_test.cpp @@ -1282,8 +1282,8 @@ TEST_F(CollectionSpecificTest, TypoCorrectionWithFaceting) { collectionManager.drop_collection("coll1"); } -TEST_F(CollectionSpecificTest, MultiFieldMatchesShouldBeWeighted) { - // 2 matches on low weighted fields should not overpower a single match on high weighted field +TEST_F(CollectionSpecificTest, MultiFieldVerbatimMatchesShouldBeWeighted) { + // 2 exact matches on low weighted fields should not overpower a single exact match on high weighted field std::vector fields = {field("name", field_types::STRING, false), field("category", field_types::STRING, false), field("label", field_types::STRING, false), @@ -1493,3 +1493,69 @@ TEST_F(CollectionSpecificTest, FacetParallelizationVerification) { collectionManager.drop_collection("coll1"); } + +TEST_F(CollectionSpecificTest, VerbatimMatchShouldConsiderTokensMatchedAcrossAllFields) { + // dropped tokens on a single field cannot be deemed as verbatim match + + std::vector fields = {field("name", field_types::STRING, false), + field("brand", field_types::STRING, false), + field("points", field_types::INT32, false),}; + + Collection* coll1 = collectionManager.create_collection("coll1", 1, fields, "points").get(); + + nlohmann::json doc1; + doc1["id"] = "0"; + doc1["name"] = "Hamburger"; + doc1["brand"] = "Burger King"; + doc1["points"] = 10; + + nlohmann::json doc2; + doc2["id"] = "1"; + doc2["name"] = "Hamburger Bun"; + doc2["brand"] = "Trader Joe’s"; + doc2["points"] = 5; + + ASSERT_TRUE(coll1->add(doc1.dump()).ok()); + ASSERT_TRUE(coll1->add(doc2.dump()).ok()); + + auto results = coll1->search("hamburger trader", {"name", "brand"}, + "", {}, {}, {0, 0}, 10, + 1, FREQUENCY, {false, false}, + 2, spp::sparse_hash_set(), + spp::sparse_hash_set(), 10, "", 30, 4, "", 10, {}, {}, {}, 0, + "", "", {1, 1}, + 1000, true).get(); + + ASSERT_EQ(2, results["hits"].size()); + ASSERT_EQ("1", results["hits"][0]["document"]["id"].get()); + ASSERT_EQ("0", results["hits"][1]["document"]["id"].get()); + + nlohmann::json doc3; + doc3["id"] = "2"; + doc3["name"] = "Potato Wedges"; + doc3["brand"] = "McDonalds"; + doc3["points"] = 10; + + nlohmann::json doc4; + doc4["id"] = "3"; + doc4["name"] = "Hot Potato Wedges"; + doc4["brand"] = "KFC Inc."; + doc4["points"] = 5; + + ASSERT_TRUE(coll1->add(doc3.dump()).ok()); + ASSERT_TRUE(coll1->add(doc4.dump()).ok()); + + results = coll1->search("potato wedges kfc", {"name", "brand"}, + "", {}, {}, {0, 0}, 10, + 1, FREQUENCY, {false, false}, + 2, spp::sparse_hash_set(), + spp::sparse_hash_set(), 10, "", 30, 4, "", 10, {}, {}, {}, 0, + "", "", {1, 1}, + 1000, true).get(); + + ASSERT_EQ(2, results["hits"].size()); + ASSERT_EQ("3", results["hits"][0]["document"]["id"].get()); + ASSERT_EQ("2", results["hits"][1]["document"]["id"].get()); + + collectionManager.drop_collection("coll1"); +}