Parse text match info based on match type.

2025-05-19 05:08:43 +08:00 · 2023-01-09 13:16:30 +05:30 · 2023-01-09 13:16:30 +05:30 · 0fc0f80d21
commit 0fc0f80d21
parent 10fa7a8fa4
3 changed files with 48 additions and 10 deletions
--- a/include/collection.h
+++ b/include/collection.h
@ -213,7 +213,7 @@ private:
                                  std::vector<std::pair<uint32_t, uint32_t>>& included_ids,
                                  std::vector<uint32_t>& excluded_ids) const;

-    void populate_text_match_info(nlohmann::json& info, uint64_t match_score) const;
+    void populate_text_match_info(nlohmann::json& info, uint64_t match_score, const text_match_type_t match_type) const;

    static void remove_flat_fields(nlohmann::json& document);

@ -249,6 +249,8 @@ private:
                                           bool enable_nested_fields,
                                           std::vector<field>& new_fields);

+    static uint64_t extract_bits(uint64_t value, unsigned lsb_offset, unsigned n);
+
 public:

    enum {MAX_ARRAY_MATCHES = 5};
--- a/src/collection.cpp
+++ b/src/collection.cpp
@ -1635,7 +1635,7 @@ Option<nlohmann::json> Collection::search(const std::string & raw_query,

                wrapper_doc["text_match_info"] = nlohmann::json::object();
                populate_text_match_info(wrapper_doc["text_match_info"],
-                                         field_order_kv->scores[field_order_kv->match_score_index]);
+                                         field_order_kv->scores[field_order_kv->match_score_index], match_type);
            }

            nlohmann::json geo_distances;
@ -1945,18 +1945,44 @@ void Collection::process_search_field_weights(const std::vector<std::string>& ra
    }
 }

-void Collection::populate_text_match_info(nlohmann::json& info, uint64_t match_score) const {
-    // [ sign | tokens_matched | best_field_score | best_field_weight | num_field_matches ]
-    // [  1   |       4        |        48       |       8            |         3         ]  (64 bits)
+// lsb_offset is zero-based and inclusive
+uint64_t Collection::extract_bits(uint64_t value, unsigned lsb_offset, unsigned n) {
+    const uint64_t max_n = CHAR_BIT * sizeof(uint64_t);
+    if (lsb_offset >= max_n) {
+        return 0;
+    }
+    value >>= lsb_offset;
+    if (n >= max_n) {
+        return value;
+    }
+    const uint64_t mask = ((uint64_t(1)) << n) - 1; /* n '1's */
+    return value & mask;
+}

-    // 0 0001 000000000010000000111111111011001000000000100000 00000110 011
+void Collection::populate_text_match_info(nlohmann::json& info, uint64_t match_score,
+                                          const text_match_type_t match_type) const {
+
+    // MAX_SCORE
+    // [ sign | tokens_matched | max_field_score | max_field_weight | num_matching_fields ]
+    // [   1  |        4       |        48       |       8          |         3           ]  (64 bits)
+
+    // MAX_WEIGHT
+    // [ sign | tokens_matched | max_field_weight | max_field_score  | num_matching_fields ]
+    // [   1  |        4       |        8         |      48          |         3           ]  (64 bits)

    info["score"] = std::to_string(match_score);

-    info["tokens_matched"] = (match_score >> 59);
-    info["best_field_score"] = std::to_string((match_score << 5) >> (8 + 3 + 5));
-    info["best_field_weight"] = ((match_score << 53) >> (3 + 53));
-    info["fields_matched"] = ((match_score << 61) >> (61));
+    if(match_type == max_score) {
+        info["tokens_matched"] = extract_bits(match_score, 59, 4);
+        info["best_field_score"] = std::to_string(extract_bits(match_score, 11, 48));
+        info["best_field_weight"] = extract_bits(match_score, 3, 8);
+        info["fields_matched"] = extract_bits(match_score, 0, 3);
+    } else {
+        info["tokens_matched"] = extract_bits(match_score, 59, 4);
+        info["best_field_weight"] = extract_bits(match_score, 51, 8);
+        info["best_field_score"] = std::to_string(extract_bits(match_score, 3, 48));
+        info["fields_matched"] = extract_bits(match_score, 0, 3);
+    }
 }

 void Collection::process_highlight_fields(const std::vector<search_field_t>& search_fields,
--- a/test/collection_specific_more_test.cpp
+++ b/test/collection_specific_more_test.cpp
@ -1699,6 +1699,16 @@ TEST_F(CollectionSpecificMoreTest, WeightTakingPrecendeceOverMatch) {
    ASSERT_EQ(2, res["hits"].size());
    ASSERT_EQ("0", res["hits"][0]["document"]["id"].get<std::string>());
    ASSERT_EQ("1", res["hits"][1]["document"]["id"].get<std::string>());
+
+    ASSERT_EQ("1108091338752", res["hits"][0]["text_match_info"]["best_field_score"].get<std::string>());
+    ASSERT_EQ(15, res["hits"][0]["text_match_info"]["best_field_weight"].get<size_t>());
+    ASSERT_EQ(2, res["hits"][0]["text_match_info"]["fields_matched"].get<size_t>());
+    ASSERT_EQ(2, res["hits"][0]["text_match_info"]["tokens_matched"].get<size_t>());
+
+    ASSERT_EQ("2211897868288", res["hits"][1]["text_match_info"]["best_field_score"].get<std::string>());
+    ASSERT_EQ(14, res["hits"][1]["text_match_info"]["best_field_weight"].get<size_t>());
+    ASSERT_EQ(1, res["hits"][1]["text_match_info"]["fields_matched"].get<size_t>());
+    ASSERT_EQ(2, res["hits"][1]["text_match_info"]["tokens_matched"].get<size_t>());
 }

 TEST_F(CollectionSpecificMoreTest, HighlightOnFieldNameWithDot) {