Parse text match info based on match type.

This commit is contained in:
Kishore Nallan 2023-01-09 13:16:30 +05:30
parent 10fa7a8fa4
commit 0fc0f80d21
3 changed files with 48 additions and 10 deletions

View File

@ -213,7 +213,7 @@ private:
std::vector<std::pair<uint32_t, uint32_t>>& included_ids,
std::vector<uint32_t>& excluded_ids) const;
void populate_text_match_info(nlohmann::json& info, uint64_t match_score) const;
void populate_text_match_info(nlohmann::json& info, uint64_t match_score, const text_match_type_t match_type) const;
static void remove_flat_fields(nlohmann::json& document);
@ -249,6 +249,8 @@ private:
bool enable_nested_fields,
std::vector<field>& new_fields);
static uint64_t extract_bits(uint64_t value, unsigned lsb_offset, unsigned n);
public:
enum {MAX_ARRAY_MATCHES = 5};

View File

@ -1635,7 +1635,7 @@ Option<nlohmann::json> Collection::search(const std::string & raw_query,
wrapper_doc["text_match_info"] = nlohmann::json::object();
populate_text_match_info(wrapper_doc["text_match_info"],
field_order_kv->scores[field_order_kv->match_score_index]);
field_order_kv->scores[field_order_kv->match_score_index], match_type);
}
nlohmann::json geo_distances;
@ -1945,18 +1945,44 @@ void Collection::process_search_field_weights(const std::vector<std::string>& ra
}
}
void Collection::populate_text_match_info(nlohmann::json& info, uint64_t match_score) const {
// [ sign | tokens_matched | best_field_score | best_field_weight | num_field_matches ]
// [ 1 | 4 | 48 | 8 | 3 ] (64 bits)
// lsb_offset is zero-based and inclusive
uint64_t Collection::extract_bits(uint64_t value, unsigned lsb_offset, unsigned n) {
const uint64_t max_n = CHAR_BIT * sizeof(uint64_t);
if (lsb_offset >= max_n) {
return 0;
}
value >>= lsb_offset;
if (n >= max_n) {
return value;
}
const uint64_t mask = ((uint64_t(1)) << n) - 1; /* n '1's */
return value & mask;
}
// 0 0001 000000000010000000111111111011001000000000100000 00000110 011
void Collection::populate_text_match_info(nlohmann::json& info, uint64_t match_score,
const text_match_type_t match_type) const {
// MAX_SCORE
// [ sign | tokens_matched | max_field_score | max_field_weight | num_matching_fields ]
// [ 1 | 4 | 48 | 8 | 3 ] (64 bits)
// MAX_WEIGHT
// [ sign | tokens_matched | max_field_weight | max_field_score | num_matching_fields ]
// [ 1 | 4 | 8 | 48 | 3 ] (64 bits)
info["score"] = std::to_string(match_score);
info["tokens_matched"] = (match_score >> 59);
info["best_field_score"] = std::to_string((match_score << 5) >> (8 + 3 + 5));
info["best_field_weight"] = ((match_score << 53) >> (3 + 53));
info["fields_matched"] = ((match_score << 61) >> (61));
if(match_type == max_score) {
info["tokens_matched"] = extract_bits(match_score, 59, 4);
info["best_field_score"] = std::to_string(extract_bits(match_score, 11, 48));
info["best_field_weight"] = extract_bits(match_score, 3, 8);
info["fields_matched"] = extract_bits(match_score, 0, 3);
} else {
info["tokens_matched"] = extract_bits(match_score, 59, 4);
info["best_field_weight"] = extract_bits(match_score, 51, 8);
info["best_field_score"] = std::to_string(extract_bits(match_score, 3, 48));
info["fields_matched"] = extract_bits(match_score, 0, 3);
}
}
void Collection::process_highlight_fields(const std::vector<search_field_t>& search_fields,

View File

@ -1699,6 +1699,16 @@ TEST_F(CollectionSpecificMoreTest, WeightTakingPrecendeceOverMatch) {
ASSERT_EQ(2, res["hits"].size());
ASSERT_EQ("0", res["hits"][0]["document"]["id"].get<std::string>());
ASSERT_EQ("1", res["hits"][1]["document"]["id"].get<std::string>());
ASSERT_EQ("1108091338752", res["hits"][0]["text_match_info"]["best_field_score"].get<std::string>());
ASSERT_EQ(15, res["hits"][0]["text_match_info"]["best_field_weight"].get<size_t>());
ASSERT_EQ(2, res["hits"][0]["text_match_info"]["fields_matched"].get<size_t>());
ASSERT_EQ(2, res["hits"][0]["text_match_info"]["tokens_matched"].get<size_t>());
ASSERT_EQ("2211897868288", res["hits"][1]["text_match_info"]["best_field_score"].get<std::string>());
ASSERT_EQ(14, res["hits"][1]["text_match_info"]["best_field_weight"].get<size_t>());
ASSERT_EQ(1, res["hits"][1]["text_match_info"]["fields_matched"].get<size_t>());
ASSERT_EQ(2, res["hits"][1]["text_match_info"]["tokens_matched"].get<size_t>());
}
TEST_F(CollectionSpecificMoreTest, HighlightOnFieldNameWithDot) {