From 0fc0f80d2181eebc176fbe9fc7b26e8d92be71ab Mon Sep 17 00:00:00 2001 From: Kishore Nallan Date: Mon, 9 Jan 2023 13:16:30 +0530 Subject: [PATCH] Parse text match info based on match type. --- include/collection.h | 4 ++- src/collection.cpp | 44 ++++++++++++++++++++------ test/collection_specific_more_test.cpp | 10 ++++++ 3 files changed, 48 insertions(+), 10 deletions(-) diff --git a/include/collection.h b/include/collection.h index 0680350b..171b1457 100644 --- a/include/collection.h +++ b/include/collection.h @@ -213,7 +213,7 @@ private: std::vector>& included_ids, std::vector& excluded_ids) const; - void populate_text_match_info(nlohmann::json& info, uint64_t match_score) const; + void populate_text_match_info(nlohmann::json& info, uint64_t match_score, const text_match_type_t match_type) const; static void remove_flat_fields(nlohmann::json& document); @@ -249,6 +249,8 @@ private: bool enable_nested_fields, std::vector& new_fields); + static uint64_t extract_bits(uint64_t value, unsigned lsb_offset, unsigned n); + public: enum {MAX_ARRAY_MATCHES = 5}; diff --git a/src/collection.cpp b/src/collection.cpp index db6e63bd..00e3dd86 100644 --- a/src/collection.cpp +++ b/src/collection.cpp @@ -1635,7 +1635,7 @@ Option Collection::search(const std::string & raw_query, wrapper_doc["text_match_info"] = nlohmann::json::object(); populate_text_match_info(wrapper_doc["text_match_info"], - field_order_kv->scores[field_order_kv->match_score_index]); + field_order_kv->scores[field_order_kv->match_score_index], match_type); } nlohmann::json geo_distances; @@ -1945,18 +1945,44 @@ void Collection::process_search_field_weights(const std::vector& ra } } -void Collection::populate_text_match_info(nlohmann::json& info, uint64_t match_score) const { - // [ sign | tokens_matched | best_field_score | best_field_weight | num_field_matches ] - // [ 1 | 4 | 48 | 8 | 3 ] (64 bits) +// lsb_offset is zero-based and inclusive +uint64_t Collection::extract_bits(uint64_t value, unsigned lsb_offset, unsigned n) { + const uint64_t max_n = CHAR_BIT * sizeof(uint64_t); + if (lsb_offset >= max_n) { + return 0; + } + value >>= lsb_offset; + if (n >= max_n) { + return value; + } + const uint64_t mask = ((uint64_t(1)) << n) - 1; /* n '1's */ + return value & mask; +} - // 0 0001 000000000010000000111111111011001000000000100000 00000110 011 +void Collection::populate_text_match_info(nlohmann::json& info, uint64_t match_score, + const text_match_type_t match_type) const { + + // MAX_SCORE + // [ sign | tokens_matched | max_field_score | max_field_weight | num_matching_fields ] + // [ 1 | 4 | 48 | 8 | 3 ] (64 bits) + + // MAX_WEIGHT + // [ sign | tokens_matched | max_field_weight | max_field_score | num_matching_fields ] + // [ 1 | 4 | 8 | 48 | 3 ] (64 bits) info["score"] = std::to_string(match_score); - info["tokens_matched"] = (match_score >> 59); - info["best_field_score"] = std::to_string((match_score << 5) >> (8 + 3 + 5)); - info["best_field_weight"] = ((match_score << 53) >> (3 + 53)); - info["fields_matched"] = ((match_score << 61) >> (61)); + if(match_type == max_score) { + info["tokens_matched"] = extract_bits(match_score, 59, 4); + info["best_field_score"] = std::to_string(extract_bits(match_score, 11, 48)); + info["best_field_weight"] = extract_bits(match_score, 3, 8); + info["fields_matched"] = extract_bits(match_score, 0, 3); + } else { + info["tokens_matched"] = extract_bits(match_score, 59, 4); + info["best_field_weight"] = extract_bits(match_score, 51, 8); + info["best_field_score"] = std::to_string(extract_bits(match_score, 3, 48)); + info["fields_matched"] = extract_bits(match_score, 0, 3); + } } void Collection::process_highlight_fields(const std::vector& search_fields, diff --git a/test/collection_specific_more_test.cpp b/test/collection_specific_more_test.cpp index 1be95b1b..57769205 100644 --- a/test/collection_specific_more_test.cpp +++ b/test/collection_specific_more_test.cpp @@ -1699,6 +1699,16 @@ TEST_F(CollectionSpecificMoreTest, WeightTakingPrecendeceOverMatch) { ASSERT_EQ(2, res["hits"].size()); ASSERT_EQ("0", res["hits"][0]["document"]["id"].get()); ASSERT_EQ("1", res["hits"][1]["document"]["id"].get()); + + ASSERT_EQ("1108091338752", res["hits"][0]["text_match_info"]["best_field_score"].get()); + ASSERT_EQ(15, res["hits"][0]["text_match_info"]["best_field_weight"].get()); + ASSERT_EQ(2, res["hits"][0]["text_match_info"]["fields_matched"].get()); + ASSERT_EQ(2, res["hits"][0]["text_match_info"]["tokens_matched"].get()); + + ASSERT_EQ("2211897868288", res["hits"][1]["text_match_info"]["best_field_score"].get()); + ASSERT_EQ(14, res["hits"][1]["text_match_info"]["best_field_weight"].get()); + ASSERT_EQ(1, res["hits"][1]["text_match_info"]["fields_matched"].get()); + ASSERT_EQ(2, res["hits"][1]["text_match_info"]["tokens_matched"].get()); } TEST_F(CollectionSpecificMoreTest, HighlightOnFieldNameWithDot) {