From 0fc0f80d2181eebc176fbe9fc7b26e8d92be71ab Mon Sep 17 00:00:00 2001
From: Kishore Nallan <kishorenc@gmail.com>
Date: Mon, 9 Jan 2023 13:16:30 +0530
Subject: [PATCH] Parse text match info based on match type.

---
 include/collection.h                   |  4 ++-
 src/collection.cpp                     | 44 ++++++++++++++++++++------
 test/collection_specific_more_test.cpp | 10 ++++++
 3 files changed, 48 insertions(+), 10 deletions(-)
diff --git a/include/collection.h b/include/collection.h
index 0680350b..171b1457 100644
--- a/include/collection.h
+++ b/include/collection.h
@@ -213,7 +213,7 @@ private:
                                   std::vector<std::pair<uint32_t, uint32_t>>& included_ids,
                                   std::vector<uint32_t>& excluded_ids) const;
 
-    void populate_text_match_info(nlohmann::json& info, uint64_t match_score) const;
+    void populate_text_match_info(nlohmann::json& info, uint64_t match_score, const text_match_type_t match_type) const;
 
     static void remove_flat_fields(nlohmann::json& document);
 
@@ -249,6 +249,8 @@ private:
                                            bool enable_nested_fields,
                                            std::vector<field>& new_fields);
 
+    static uint64_t extract_bits(uint64_t value, unsigned lsb_offset, unsigned n);
+
 public:
 
     enum {MAX_ARRAY_MATCHES = 5};
diff --git a/src/collection.cpp b/src/collection.cpp
index db6e63bd..00e3dd86 100644
--- a/src/collection.cpp
+++ b/src/collection.cpp
@@ -1635,7 +1635,7 @@ Option<nlohmann::json> Collection::search(const std::string & raw_query,
 
                 wrapper_doc["text_match_info"] = nlohmann::json::object();
                 populate_text_match_info(wrapper_doc["text_match_info"],
-                                         field_order_kv->scores[field_order_kv->match_score_index]);
+                                         field_order_kv->scores[field_order_kv->match_score_index], match_type);
             }
 
             nlohmann::json geo_distances;
@@ -1945,18 +1945,44 @@ void Collection::process_search_field_weights(const std::vector<std::string>& ra
     }
 }
 
-void Collection::populate_text_match_info(nlohmann::json& info, uint64_t match_score) const {
-    // [ sign | tokens_matched | best_field_score | best_field_weight | num_field_matches ]
-    // [  1   |       4        |        48       |       8            |         3         ]  (64 bits)
+// lsb_offset is zero-based and inclusive
+uint64_t Collection::extract_bits(uint64_t value, unsigned lsb_offset, unsigned n) {
+    const uint64_t max_n = CHAR_BIT * sizeof(uint64_t);
+    if (lsb_offset >= max_n) {
+        return 0;
+    }
+    value >>= lsb_offset;
+    if (n >= max_n) {
+        return value;
+    }
+    const uint64_t mask = ((uint64_t(1)) << n) - 1; /* n '1's */
+    return value & mask;
+}
 
-    // 0 0001 000000000010000000111111111011001000000000100000 00000110 011
+void Collection::populate_text_match_info(nlohmann::json& info, uint64_t match_score,
+                                          const text_match_type_t match_type) const {
+
+    // MAX_SCORE
+    // [ sign | tokens_matched | max_field_score | max_field_weight | num_matching_fields ]
+    // [   1  |        4       |        48       |       8          |         3           ]  (64 bits)
+
+    // MAX_WEIGHT
+    // [ sign | tokens_matched | max_field_weight | max_field_score  | num_matching_fields ]
+    // [   1  |        4       |        8         |      48          |         3           ]  (64 bits)
 
     info["score"] = std::to_string(match_score);
 
-    info["tokens_matched"] = (match_score >> 59);
-    info["best_field_score"] = std::to_string((match_score << 5) >> (8 + 3 + 5));
-    info["best_field_weight"] = ((match_score << 53) >> (3 + 53));
-    info["fields_matched"] = ((match_score << 61) >> (61));
+    if(match_type == max_score) {
+        info["tokens_matched"] = extract_bits(match_score, 59, 4);
+        info["best_field_score"] = std::to_string(extract_bits(match_score, 11, 48));
+        info["best_field_weight"] = extract_bits(match_score, 3, 8);
+        info["fields_matched"] = extract_bits(match_score, 0, 3);
+    } else {
+        info["tokens_matched"] = extract_bits(match_score, 59, 4);
+        info["best_field_weight"] = extract_bits(match_score, 51, 8);
+        info["best_field_score"] = std::to_string(extract_bits(match_score, 3, 48));
+        info["fields_matched"] = extract_bits(match_score, 0, 3);
+    }
 }
 
 void Collection::process_highlight_fields(const std::vector<search_field_t>& search_fields,
diff --git a/test/collection_specific_more_test.cpp b/test/collection_specific_more_test.cpp
index 1be95b1b..57769205 100644
--- a/test/collection_specific_more_test.cpp
+++ b/test/collection_specific_more_test.cpp
@@ -1699,6 +1699,16 @@ TEST_F(CollectionSpecificMoreTest, WeightTakingPrecendeceOverMatch) {
     ASSERT_EQ(2, res["hits"].size());
     ASSERT_EQ("0", res["hits"][0]["document"]["id"].get<std::string>());
     ASSERT_EQ("1", res["hits"][1]["document"]["id"].get<std::string>());
+
+    ASSERT_EQ("1108091338752", res["hits"][0]["text_match_info"]["best_field_score"].get<std::string>());
+    ASSERT_EQ(15, res["hits"][0]["text_match_info"]["best_field_weight"].get<size_t>());
+    ASSERT_EQ(2, res["hits"][0]["text_match_info"]["fields_matched"].get<size_t>());
+    ASSERT_EQ(2, res["hits"][0]["text_match_info"]["tokens_matched"].get<size_t>());
+
+    ASSERT_EQ("2211897868288", res["hits"][1]["text_match_info"]["best_field_score"].get<std::string>());
+    ASSERT_EQ(14, res["hits"][1]["text_match_info"]["best_field_weight"].get<size_t>());
+    ASSERT_EQ(1, res["hits"][1]["text_match_info"]["fields_matched"].get<size_t>());
+    ASSERT_EQ(2, res["hits"][1]["text_match_info"]["tokens_matched"].get<size_t>());
 }
 
 TEST_F(CollectionSpecificMoreTest, HighlightOnFieldNameWithDot) {