From ce7b6e12e996709ade8c2d1cb03227c955fd9424 Mon Sep 17 00:00:00 2001
From: Kishore Nallan <kishorenc@gmail.com>
Date: Fri, 27 Aug 2021 20:52:51 +0530
Subject: [PATCH] Prioritize record with a field containing all tokens in the
 query.

---
 src/index.cpp            |  6 +++---
 test/collection_test.cpp | 17 ++++++++++++++---
 2 files changed, 17 insertions(+), 6 deletions(-)
diff --git a/src/index.cpp b/src/index.cpp
index 467ff762..ac0f953f 100644
--- a/src/index.cpp
+++ b/src/index.cpp
@@ -945,7 +945,7 @@ void Index::search_candidates(const uint8_t & field_id, bool field_is_array,
             std::string qtok(reinterpret_cast<char*>(qleaf->key),qleaf->key_len - 1);
             fullq << qtok << " ";
         }
-        LOG(INFO) << "field: " << size_t(field_id) << ", query: " << fullq.str();*/
+        LOG(INFO) << "field: " << size_t(field_id) << ", query: " << fullq.str() << ", total_cost: " << total_cost;*/
 
         // Prepare excluded document IDs that we can later remove from the result set
         uint32_t* excluded_result_ids = nullptr;
@@ -1803,8 +1803,8 @@ void Index::search(const std::vector<query_tokens_t>& field_query_tokens,
             max_weighted_tokens_match = std::min<uint64_t>(255, max_weighted_tokens_match);
 
             uint64_t aggregated_score = (
-                //(exact_match_fields << 48)  |       // number of fields that contain *all tokens* in the query
-                (verbatim_match_fields << 48)  |      // field value *exactly* same as query tokens
+                (verbatim_match_fields << 56)  |      // field value *exactly* same as query tokens
+                (exact_match_fields << 48)  |         // number of fields that contain *all tokens* in the query
                 (max_weighted_tokens_match << 40) |   // weighted max number of tokens matched in a field
                 (uniq_tokens_found << 32)   |         // number of unique tokens found across fields including typos
                 ((255 - min_typos) << 24)   |         // minimum typo cost across all fields
diff --git a/test/collection_test.cpp b/test/collection_test.cpp
index 187845df..1811c45a 100644
--- a/test/collection_test.cpp
+++ b/test/collection_test.cpp
@@ -3014,7 +3014,7 @@ TEST_F(CollectionTest, MultiFieldRelevance2) {
     ASSERT_STREQ("1", results["hits"][0]["document"]["id"].get<std::string>().c_str());
     ASSERT_STREQ("0", results["hits"][1]["document"]["id"].get<std::string>().c_str());
 
-    // change weights to favor artist
+    // changing weights to favor artist still favors title because it contains all tokens of the query
 
     results = coll1->search("on a jetplane",
                             {"title", "artist"}, "", {}, {}, {0}, 10, 1, FREQUENCY,
@@ -3022,8 +3022,8 @@ TEST_F(CollectionTest, MultiFieldRelevance2) {
                             spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 40, {}, {}, {}, 0,
                             "<mark>", "</mark>", {1, 4}).get();
 
-    ASSERT_STREQ("0", results["hits"][0]["document"]["id"].get<std::string>().c_str());
-    ASSERT_STREQ("1", results["hits"][1]["document"]["id"].get<std::string>().c_str());
+    ASSERT_STREQ("1", results["hits"][0]["document"]["id"].get<std::string>().c_str());
+    ASSERT_STREQ("0", results["hits"][1]["document"]["id"].get<std::string>().c_str());
 
     // use same weights
 
@@ -3036,6 +3036,17 @@ TEST_F(CollectionTest, MultiFieldRelevance2) {
     ASSERT_STREQ("1", results["hits"][0]["document"]["id"].get<std::string>().c_str());
     ASSERT_STREQ("0", results["hits"][1]["document"]["id"].get<std::string>().c_str());
 
+    // add weights to favor artist without all tokens in a query being found in a field
+
+    results = coll1->search("on a helicopter",
+                            {"title", "artist"}, "", {}, {}, {0}, 10, 1, FREQUENCY,
+                            {true}, 10, spp::sparse_hash_set<std::string>(),
+                            spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 40, {}, {}, {}, 0,
+                            "<mark>", "</mark>", {1, 4}).get();
+
+    ASSERT_STREQ("0", results["hits"][0]["document"]["id"].get<std::string>().c_str());
+    ASSERT_STREQ("1", results["hits"][1]["document"]["id"].get<std::string>().c_str());
+
     collectionManager.drop_collection("coll1");
 }