Add text match modes: max_score and max_weight.

2025-05-17 20:22:32 +08:00 · 2023-01-04 20:30:30 +05:30 · 2023-01-04 20:30:30 +05:30 · bc31be874a
commit bc31be874a
parent f380bd5fa9
8 changed files with 167 additions and 100 deletions
--- a/include/collection.h
+++ b/include/collection.h
@ -410,7 +410,8 @@ public:
                                  const bool prioritize_token_position = false,
                                  const std::string& vector_query_str = "",
                                  const bool enable_highlight_v1 = true,
-                                  const uint64_t search_time_start_us = 0) const;
+                                  const uint64_t search_time_start_us = 0,
+                                  const text_match_type_t match_type = max_score) const;

    Option<bool> get_filter_ids(const std::string & simple_filter_query,
                                std::vector<std::pair<size_t, uint32_t*>>& index_ids);
--- a/include/index.h
+++ b/include/index.h
@ -88,9 +88,15 @@ enum enable_t {
    off
 };

+enum text_match_type_t {
+    max_score,
+    max_weight
+};
+
 struct search_args {
    std::vector<query_tokens_t> field_query_tokens;
    std::vector<search_field_t> search_fields;
+    const text_match_type_t match_type;
    const filter_node_t* filter_tree_root;
    std::vector<facet>& facets;
    std::vector<std::pair<uint32_t, uint32_t>>& included_ids;
@ -135,6 +141,7 @@ struct search_args {
    vector_query_t& vector_query;

    search_args(std::vector<query_tokens_t> field_query_tokens, std::vector<search_field_t> search_fields,
+                const text_match_type_t match_type,
                filter_node_t* filter_tree_root, std::vector<facet>& facets,
                std::vector<std::pair<uint32_t, uint32_t>>& included_ids, std::vector<uint32_t> excluded_ids,
                std::vector<sort_by>& sort_fields_std, facet_query_t facet_query, const std::vector<uint32_t>& num_typos,
@ -148,7 +155,7 @@ struct search_args {
                const size_t max_extra_prefix, const size_t max_extra_suffix, const size_t facet_query_num_typos,
                const bool filter_curated_hits, const enable_t split_join_tokens, vector_query_t& vector_query) :
            field_query_tokens(field_query_tokens),
-            search_fields(search_fields), filter_tree_root(filter_tree_root), facets(facets),
+            search_fields(search_fields), match_type(match_type), filter_tree_root(filter_tree_root), facets(facets),
            included_ids(included_ids), excluded_ids(excluded_ids), sort_fields_std(sort_fields_std),
            facet_query(facet_query), num_typos(num_typos), max_facet_values(max_facet_values), per_page(per_page),
            page(page), token_order(token_order), prefixes(prefixes),
@ -407,6 +414,7 @@ private:
                      size_t max_candidates) const;

    void search_all_candidates(const size_t num_search_fields,
+                               const text_match_type_t match_type,
                               const std::vector<search_field_t>& the_fields,
                               const uint32_t* filter_ids, size_t filter_ids_length,
                               const uint32_t* exclude_token_ids, size_t exclude_token_ids_size,
@ -627,6 +635,7 @@ public:
    void run_search(search_args* search_params);

    void search(std::vector<query_tokens_t>& field_query_tokens, const std::vector<search_field_t>& the_fields,
+                const text_match_type_t match_type,
                filter_node_t const* const& filter_tree_root, std::vector<facet>& facets, facet_query_t& facet_query,
                const std::vector<std::pair<uint32_t, uint32_t>>& included_ids,
                const std::vector<uint32_t>& excluded_ids, std::vector<sort_by>& sort_fields_std,
@ -758,6 +767,7 @@ public:
                         spp::sparse_hash_set<uint64_t>& groups_processed) const;

    void do_synonym_search(const std::vector<search_field_t>& the_fields,
+                           const text_match_type_t match_type,
                           filter_node_t const* const& filter_tree_root,
                           const std::map<size_t, std::map<size_t, uint32_t>>& included_ids_map,
                           const std::vector<sort_by>& sort_fields_std, Topster* curated_topster,
@ -790,6 +800,7 @@ public:

    void fuzzy_search_fields(const std::vector<search_field_t>& the_fields,
                             const std::vector<token_t>& query_tokens,
+                             const text_match_type_t match_type,
                             const bool dropped_tokens,
                             const uint32_t* exclude_token_ids,
                             size_t exclude_token_ids_size,
@ -832,6 +843,7 @@ public:
                              const std::vector<bool>& prefixes,
                              const std::vector<search_field_t>& the_fields,
                              const size_t num_search_fields,
+                              const text_match_type_t match_type,
                              const std::vector<sort_by>& sort_fields,
                              Topster* topster,
                              spp::sparse_hash_set<uint64_t>& groups_processed,
--- a/include/topster.h
+++ b/include/topster.h
@ -7,11 +7,9 @@
 #include <unordered_map>

 struct KV {
-    uint8_t field_id{};
    int8_t match_score_index{};
    uint16_t query_index{};
    uint16_t array_index{};
-    uint32_t token_bits{};
    uint64_t key{};
    uint64_t distinct_key{};
    int64_t scores[3]{};  // match score + 2 custom attributes
@ -19,10 +17,8 @@ struct KV {
    // to be used only in final aggregation
    uint64_t* query_indices = nullptr;

-    KV(uint8_t field_id, uint16_t queryIndex, uint32_t token_bits, uint64_t key, uint64_t distinct_key,
-       uint8_t match_score_index, const int64_t *scores):
-            field_id(field_id), match_score_index(match_score_index),
-            query_index(queryIndex), array_index(0), token_bits(token_bits), key(key),
+    KV(uint16_t queryIndex, uint64_t key, uint64_t distinct_key, uint8_t match_score_index, const int64_t *scores):
+            match_score_index(match_score_index), query_index(queryIndex), array_index(0), key(key),
            distinct_key(distinct_key) {
        this->scores[0] = scores[0];
        this->scores[1] = scores[1];
@ -33,8 +29,8 @@ struct KV {

    KV(KV& kv) = default;

-    KV(KV&& kv) noexcept : field_id(kv.field_id), match_score_index(kv.match_score_index),
-                 query_index(kv.query_index), array_index(kv.array_index), token_bits(kv.token_bits),
+    KV(KV&& kv) noexcept : match_score_index(kv.match_score_index),
+                 query_index(kv.query_index), array_index(kv.array_index),
                 key(kv.key), distinct_key(kv.distinct_key) {

        scores[0] = kv.scores[0];
@ -47,11 +43,9 @@ struct KV {

    KV& operator=(KV&& kv) noexcept  {
        if (this != &kv) {
-            field_id = kv.field_id;
            match_score_index = kv.match_score_index;
            query_index = kv.query_index;
            array_index = kv.array_index;
-            token_bits = kv.token_bits;
            key = kv.key;
            distinct_key = kv.distinct_key;

@ -69,11 +63,9 @@ struct KV {

    KV& operator=(KV& kv) noexcept  {
        if (this != &kv) {
-            field_id = kv.field_id;
            match_score_index = kv.match_score_index;
            query_index = kv.query_index;
            array_index = kv.array_index;
-            token_bits = kv.token_bits;
            key = kv.key;
            distinct_key = kv.distinct_key;

@ -120,11 +112,9 @@ struct Topster {
        kvs = new KV*[capacity];

        for(size_t i=0; i<capacity; i++) {
-            data[i].field_id = 0;
            data[i].match_score_index = 0;
            data[i].query_index = 0;
            data[i].array_index = i;
-            data[i].token_bits = 0;
            data[i].key = 0;
            data[i].distinct_key = 0;
            kvs[i] = &data[i];
--- a/src/collection.cpp
+++ b/src/collection.cpp
@ -868,7 +868,8 @@ Option<nlohmann::json> Collection::search(const std::string & raw_query,
                                  const bool prioritize_token_position,
                                  const std::string& vector_query_str,
                                  const bool enable_highlight_v1,
-                                  const uint64_t search_time_start_us) const {
+                                  const uint64_t search_time_start_us,
+                                  const text_match_type_t match_type) const {

    std::shared_lock lock(mutex);

@ -1297,6 +1298,7 @@ Option<nlohmann::json> Collection::search(const std::string & raw_query,

    size_t index_id = 0;
    search_args* search_params = new search_args(field_query_tokens, weighted_search_fields,
+                                                 match_type,
                                                 filter_tree_root, facets, included_ids, excluded_ids,
                                                 sort_fields_std, facet_query, num_typos, max_facet_values, max_hits,
                                                 per_page, page, token_order, prefixes,
--- a/src/collection_manager.cpp
+++ b/src/collection_manager.cpp
@ -697,6 +697,8 @@ Option<bool> CollectionManager::do_search(std::map<std::string, std::string>& re
    const char *EXHAUSTIVE_SEARCH = "exhaustive_search";
    const char *SPLIT_JOIN_TOKENS = "split_join_tokens";

+    const char *TEXT_MATCH_TYPE = "text_match_type";
+
    const char *ENABLE_HIGHLIGHT_V1 = "enable_highlight_v1";

    // enrich params with values from embedded params
@ -776,6 +778,7 @@ Option<bool> CollectionManager::do_search(std::map<std::string, std::string>& re
    size_t max_extra_prefix = INT16_MAX;
    size_t max_extra_suffix = INT16_MAX;
    bool enable_highlight_v1 = true;
+    text_match_type_t match_type;

    std::unordered_map<std::string, size_t*> unsigned_int_values = {
        {MIN_LEN_1TYPO, &min_len_1typo},
@ -861,6 +864,13 @@ Option<bool> CollectionManager::do_search(std::map<std::string, std::string>& re
            }
        }

+        else if(key == TEXT_MATCH_TYPE) {
+            auto match_op = magic_enum::enum_cast<text_match_type_t>(val);
+            if(match_op.has_value()) {
+                match_type = match_op.value();
+            }
+        }
+
        else {
            auto find_int_it = unsigned_int_values.find(key);
            if(find_int_it != unsigned_int_values.end()) {
--- a/src/index.cpp
+++ b/src/index.cpp
@ -1302,6 +1302,7 @@ void Index::aggregate_topster(Topster* agg_topster, Topster* index_topster) {
 }

 void Index::search_all_candidates(const size_t num_search_fields,
+                                  const text_match_type_t match_type,
                                  const std::vector<search_field_t>& the_fields,
                                  const uint32_t* filter_ids, size_t filter_ids_length,
                                  const uint32_t* exclude_token_ids, size_t exclude_token_ids_size,
@ -1366,7 +1367,7 @@ void Index::search_all_candidates(const size_t num_search_fields,

        //LOG(INFO) << "field_num_results: " << field_num_results << ", typo_tokens_threshold: " << typo_tokens_threshold;

-        search_across_fields(query_suggestion, num_typos, prefixes, the_fields, num_search_fields,
+        search_across_fields(query_suggestion, num_typos, prefixes, the_fields, num_search_fields, match_type,
                             sort_fields, topster,groups_processed,
                             searched_queries, qtoken_set, group_limit, group_by_fields,
                             prioritize_exact_match, prioritize_token_position,
@ -1930,6 +1931,7 @@ void Index::do_filtering_with_lock(uint32_t*& filter_ids,
 void Index::run_search(search_args* search_params) {
    search(search_params->field_query_tokens,
           search_params->search_fields,
+           search_params->match_type,
           search_params->filter_tree_root, search_params->facets, search_params->facet_query,
           search_params->included_ids, search_params->excluded_ids,
           search_params->sort_fields_std, search_params->num_typos,
@ -1986,9 +1988,7 @@ void Index::collate_included_ids(const std::vector<token_t>& q_included_tokens,
            scores[1] = int64_t(1);
            scores[2] = int64_t(1);

-            uint32_t token_bits = 0;
-
-            KV kv(0, searched_queries.size(), token_bits, seq_id, distinct_id, 0, scores);
+            KV kv(searched_queries.size(), seq_id, distinct_id, 0, scores);
            curated_topster->add(&kv);
        }
    }
@ -2388,6 +2388,7 @@ void Index::search_infix(const std::string& query, const std::string& field_name
 }

 void Index::search(std::vector<query_tokens_t>& field_query_tokens, const std::vector<search_field_t>& the_fields,
+                   const text_match_type_t match_type,
                   filter_node_t const* const& filter_tree_root, std::vector<facet>& facets, facet_query_t& facet_query,
                   const std::vector<std::pair<uint32_t, uint32_t>>& included_ids,
                   const std::vector<uint32_t>& excluded_ids, std::vector<sort_by>& sort_fields_std,
@ -2489,7 +2490,7 @@ void Index::search(std::vector<query_tokens_t>& field_query_tokens, const std::v
                int64_t match_score_index = -1;

                result_ids.push_back(seq_id);
-                KV kv(field_id, searched_queries.size(), 0, seq_id, distinct_id, match_score_index, scores);
+                KV kv(searched_queries.size(), seq_id, distinct_id, match_score_index, scores);
                topster->add(&kv);

                if (result_ids.size() == page * per_page) {
@ -2581,7 +2582,7 @@ void Index::search(std::vector<query_tokens_t>& field_query_tokens, const std::v

                //LOG(INFO) << "SEQ_ID: " << seq_id << ", score: " << dist_label.first;

-                KV kv(0, searched_queries.size(), 0, seq_id, distinct_id, match_score_index, scores);
+                KV kv(searched_queries.size(), seq_id, distinct_id, match_score_index, scores);
                topster->add(&kv);
                nearest_ids.push_back(seq_id);
            }
@ -2638,7 +2639,7 @@ void Index::search(std::vector<query_tokens_t>& field_query_tokens, const std::v
            }
        }

-        fuzzy_search_fields(the_fields, field_query_tokens[0].q_include_tokens, false, excluded_result_ids,
+        fuzzy_search_fields(the_fields, field_query_tokens[0].q_include_tokens, match_type, false, excluded_result_ids,
                            excluded_result_ids_size, filter_ids, filter_ids_length, curated_ids_sorted,
                            sort_fields_std, num_typos, searched_queries, qtoken_set, topster, groups_processed,
                            all_result_ids, all_result_ids_len, group_limit, group_by_fields, prioritize_exact_match,
@ -2675,7 +2676,7 @@ void Index::search(std::vector<query_tokens_t>& field_query_tokens, const std::v
                                                 space_resolved_queries[0][j].size(), 0);
                }

-                fuzzy_search_fields(the_fields, resolved_tokens, false, excluded_result_ids,
+                fuzzy_search_fields(the_fields, resolved_tokens, match_type, false, excluded_result_ids,
                                    excluded_result_ids_size, filter_ids, filter_ids_length, curated_ids_sorted,
                                    sort_fields_std, num_typos, searched_queries, qtoken_set, topster, groups_processed,
                                    all_result_ids, all_result_ids_len, group_limit, group_by_fields, prioritize_exact_match,
@ -2685,7 +2686,8 @@ void Index::search(std::vector<query_tokens_t>& field_query_tokens, const std::v
        }

        // do synonym based searches
-        do_synonym_search(the_fields, filter_tree_root, included_ids_map, sort_fields_std, curated_topster, token_order,
+        do_synonym_search(the_fields, match_type, filter_tree_root, included_ids_map, sort_fields_std,
+                          curated_topster, token_order,
                          0, group_limit, group_by_fields, prioritize_exact_match, prioritize_token_position,
                          exhaustive_search, concurrency, prefixes,
                          min_len_1typo, min_len_2typo, max_candidates, curated_ids, curated_ids_sorted,
@ -2731,7 +2733,7 @@ void Index::search(std::vector<query_tokens_t>& field_query_tokens, const std::v
                            drop_token_prefixes.push_back(p && prefix_search);
                        }

-                        fuzzy_search_fields(the_fields, truncated_tokens, true, excluded_result_ids,
+                        fuzzy_search_fields(the_fields, truncated_tokens, match_type, true, excluded_result_ids,
                                            excluded_result_ids_size, filter_ids, filter_ids_length, curated_ids_sorted,
                                            sort_fields_std, num_typos, searched_queries, qtoken_set, topster, groups_processed,
                                            all_result_ids, all_result_ids_len, group_limit, group_by_fields, prioritize_exact_match,
@ -2955,6 +2957,7 @@ void Index::process_curated_ids(const std::vector<std::pair<uint32_t, uint32_t>>

 void Index::fuzzy_search_fields(const std::vector<search_field_t>& the_fields,
                                const std::vector<token_t>& query_tokens,
+                                const text_match_type_t match_type,
                                const bool dropped_tokens,
                                const uint32_t* exclude_token_ids,
                                size_t exclude_token_ids_size,
@ -3260,7 +3263,7 @@ void Index::fuzzy_search_fields(const std::vector<search_field_t>& the_fields,

        if(token_candidates_vec.size() == query_tokens.size()) {
            std::vector<uint32_t> id_buff;
-            search_all_candidates(num_search_fields, the_fields, filter_ids, filter_ids_length,
+            search_all_candidates(num_search_fields, match_type, the_fields, filter_ids, filter_ids_length,
                                  exclude_token_ids, exclude_token_ids_size,
                                  sort_fields, token_candidates_vec, searched_queries, qtoken_set, topster,
                                  groups_processed, all_result_ids, all_result_ids_len,
@ -3410,6 +3413,7 @@ void Index::search_across_fields(const std::vector<token_t>& query_tokens,
                                 const std::vector<bool>& prefixes,
                                 const std::vector<search_field_t>& the_fields,
                                 const size_t num_search_fields,
+                                 const text_match_type_t match_type,
                                 const std::vector<sort_by>& sort_fields,
                                 Topster* topster,
                                 spp::sparse_hash_set<uint64_t>& groups_processed,
@ -3519,7 +3523,7 @@ void Index::search_across_fields(const std::vector<token_t>& query_tokens,
            }
        }

-        int64_t max_field_match_score = 0, max_field_match_index = 0;
+        int64_t best_field_match_score = 0, best_field_weight = 0;
        uint32_t num_matching_fields = 0;

        for(size_t fi = 0; fi < field_to_tokens.size(); fi++) {
@ -3528,10 +3532,12 @@ void Index::search_across_fields(const std::vector<token_t>& query_tokens,
                continue;
            }

-            bool field_is_array = search_schema.at(the_fields[fi].name).is_array();
-            int64_t field_match_score = 0;
+            const int64_t field_weight = the_fields[fi].weight;
+            const bool field_is_array = search_schema.at(the_fields[fi].name).is_array();

+            int64_t field_match_score = 0;
            bool single_exact_query_token = false;
+
            if(total_cost == 0 && query_tokens.size() == 1) {
                // does this candidate suggestion token match query token exactly?
                single_exact_query_token = true;
@ -3543,9 +3549,14 @@ void Index::search_across_fields(const std::vector<token_t>& query_tokens,
                           prioritize_exact_match, single_exact_query_token, prioritize_token_position,
                           query_tokens.size(), syn_orig_num_tokens, token_postings);

-            if(field_match_score > max_field_match_score) {
-                max_field_match_score = field_match_score;
-                max_field_match_index = fi;
+            if(match_type == max_score && field_match_score > best_field_match_score) {
+                best_field_match_score = field_match_score;
+                best_field_weight = field_weight;
+            }
+
+            if(match_type == max_weight && field_weight > best_field_weight) {
+                best_field_weight = field_weight;
+                best_field_match_score = field_match_score;
            }

            num_matching_fields++;
@ -3561,7 +3572,7 @@ void Index::search_across_fields(const std::vector<token_t>& query_tokens,
        int64_t match_score_index = -1;

        compute_sort_scores(sort_fields, sort_order, field_values, geopoint_indices, seq_id, filter_index,
-                            max_field_match_score, scores, match_score_index);
+                            best_field_match_score, scores, match_score_index);

        size_t query_len = query_tokens.size();
        if(syn_orig_num_tokens != -1) {
@ -3572,26 +3583,39 @@ void Index::search_across_fields(const std::vector<token_t>& query_tokens,
        // NOTE: `query_len` is total tokens matched across fields.
        // Within a field, only a subset can match

+        // MAX_SCORE
        // [ sign | tokens_matched | max_field_score | max_field_weight | num_matching_fields ]
        // [   1  |        4       |        48       |       8          |         3           ]  (64 bits)

-        auto max_field_weight = std::min<size_t>(FIELD_MAX_WEIGHT, the_fields[max_field_match_index].weight);
+        // MAX_WEIGHT
+        // [ sign | tokens_matched | max_field_weight | max_field_score  | num_matching_fields ]
+        // [   1  |        4       |        8         |      48          |         3           ]  (64 bits)
+
+        auto max_field_weight = std::min<size_t>(FIELD_MAX_WEIGHT, best_field_weight);
        num_matching_fields = std::min<size_t>(7, num_matching_fields);

-        uint64_t aggregated_score = (int64_t(query_len) << 59) |
-                                    (int64_t(max_field_match_score) << 11) |
+        uint64_t aggregated_score = match_type == max_score ?
+                                    ((int64_t(query_len) << 59) |
+                                    (int64_t(best_field_match_score) << 11) |
                                    (int64_t(max_field_weight) << 3) |
-                                    (int64_t(num_matching_fields) << 0);
+                                    (int64_t(num_matching_fields) << 0))
+
+                                    :
+
+                                    ((int64_t(query_len) << 59) |
+                                     (int64_t(max_field_weight) << 51) |
+                                     (int64_t(best_field_match_score) << 3) |
+                                     (int64_t(num_matching_fields) << 0))
+                                    ;

        /*LOG(INFO) << "seq_id: " << seq_id << ", query_len: " << query_len
                  << ", syn_orig_num_tokens: " << syn_orig_num_tokens
-                  << ", max_field_match_score: " << max_field_match_score
-                  << ", max_field_match_index: " << max_field_match_index
-                  << ", field_weight: " << max_field_weight
+                  << ", best_field_match_score: " << best_field_match_score
+                  << ", max_field_weight: " << max_field_weight
                  << ", num_matching_fields: " << num_matching_fields
                  << ", aggregated_score: " << aggregated_score;*/

-        KV kv(0, searched_queries.size(), 0, seq_id, distinct_id, match_score_index, scores);
+        KV kv(searched_queries.size(), seq_id, distinct_id, match_score_index, scores);
        if(match_score_index != -1) {
            kv.scores[match_score_index] = aggregated_score;
        }
@ -3949,6 +3973,7 @@ void Index::do_phrase_search(const size_t num_search_fields, const std::vector<s
 }

 void Index::do_synonym_search(const std::vector<search_field_t>& the_fields,
+                              const text_match_type_t match_type,
                              filter_node_t const* const& filter_tree_root,
                              const std::map<size_t, std::map<size_t, uint32_t>>& included_ids_map,
                              const std::vector<sort_by>& sort_fields_std, Topster* curated_topster,
@ -3977,7 +4002,7 @@ void Index::do_synonym_search(const std::vector<search_field_t>& the_fields,

    for (const auto& syn_tokens : q_pos_synonyms) {
        query_hashes.clear();
-        fuzzy_search_fields(the_fields, syn_tokens, false, exclude_token_ids,
+        fuzzy_search_fields(the_fields, syn_tokens, match_type, false, exclude_token_ids,
                            exclude_token_ids_size, filter_ids, filter_ids_length, curated_ids_sorted,
                            sort_fields_std, {0}, searched_queries, qtoken_set, actual_topster, groups_processed,
                            all_result_ids, all_result_ids_len, group_limit, group_by_fields, prioritize_exact_match,
@ -4062,7 +4087,7 @@ void Index::do_infix_search(const size_t num_search_fields, const std::vector<se
                        groups_processed.emplace(distinct_id);
                    }

-                    KV kv(field_id, searched_queries.size(), 0, seq_id, distinct_id, match_score_index, scores);
+                    KV kv(searched_queries.size(), seq_id, distinct_id, match_score_index, scores);
                    actual_topster->add(&kv);

                    if(((i + 1) % (1 << 12)) == 0) {
@ -4386,7 +4411,7 @@ void Index::search_wildcard(filter_node_t const* const& filter_tree_root,
                    tgroups_processed[thread_id].emplace(distinct_id);
                }

-                KV kv(0, searched_queries.size(), 0, seq_id, distinct_id, match_score_index, scores);
+                KV kv(searched_queries.size(), seq_id, distinct_id, match_score_index, scores);
                topsters[thread_id]->add(&kv);

                if(check_for_circuit_break && ((i + 1) % (1 << 15)) == 0) {
@ -4973,7 +4998,7 @@ void Index::score_results(const std::vector<sort_by> & sort_fields, const uint16
    }

    //LOG(INFO) << "Seq id: " << seq_id << ", match_score: " << match_score;
-    KV kv(field_id, query_index, token_bits, seq_id, distinct_id, match_score_index, scores);
+    KV kv(query_index, seq_id, distinct_id, match_score_index, scores);
    topster->add(&kv);

    //long long int timeNanos = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now() - begin).count();
--- a/test/collection_specific_more_test.cpp
+++ b/test/collection_specific_more_test.cpp
@ -1668,6 +1668,39 @@ TEST_F(CollectionSpecificMoreTest, PhraseMatchMultipleFields) {
    ASSERT_EQ("0", res["hits"][1]["document"]["id"].get<std::string>());
 }

+TEST_F(CollectionSpecificMoreTest, WeightTakingPrecendeceOverMatch) {
+    nlohmann::json schema = R"({
+        "name": "coll1",
+        "fields": [
+            {"name": "brand", "type": "string"},
+            {"name": "title", "type": "string"}
+        ]
+    })"_json;
+
+    Collection* coll1 = collectionManager.create_collection(schema).get();
+
+    nlohmann::json doc;
+    doc["id"] = "0";
+    doc["title"] = "Healthy Mayo";
+    doc["brand"] = "Light Plus";
+    ASSERT_TRUE(coll1->add(doc.dump()).ok());
+
+    doc["id"] = "1";
+    doc["title"] = "Healthy Light Mayo";
+    doc["brand"] = "Vegabond";
+    ASSERT_TRUE(coll1->add(doc.dump()).ok());
+
+    auto res = coll1->search("light mayo", {"brand", "title"}, "", {}, {}, {2}, 10, 1, FREQUENCY, {true}, 5,
+                             spp::sparse_hash_set<std::string>(),
+                             spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 20, {}, {}, {}, 0,
+                             "<mark>", "</mark>", {}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7, fallback,
+                             4, {off}, 0, 0, 0, 2, false, "", true, 0, max_weight).get();
+
+    ASSERT_EQ(2, res["hits"].size());
+    ASSERT_EQ("0", res["hits"][0]["document"]["id"].get<std::string>());
+    ASSERT_EQ("1", res["hits"][1]["document"]["id"].get<std::string>());
+}
+
 TEST_F(CollectionSpecificMoreTest, HighlightOnFieldNameWithDot) {
    nlohmann::json schema = R"({
        "name": "coll1",
--- a/test/topster_test.cpp
+++ b/test/topster_test.cpp
@ -8,28 +8,26 @@ TEST(TopsterTest, MaxIntValues) {
    Topster topster(5);

    struct {
-        uint8_t field_id;
        uint16_t query_index;
-        uint32_t token_bits;
        uint64_t key;
        uint64_t match_score;
        int64_t primary_attr;
        int64_t secondary_attr;
    } data[14] = {
-        {1, 0, 255,  1, 11, 20, 30},
-        {1, 0, 255,  1, 12, 20, 32},
-        {1, 0, 255,  2, 4, 20, 30},
-        {1, 2, 255,  3, 7, 20, 30},
-        {1, 0, 255,  4, 14, 20, 30},
-        {1, 1, 255,  5, 9, 20, 30},
-        {1, 1, 255,  5, 10, 20, 32},
-        {1, 1, 255,  5, 9, 20, 30},
-        {1, 0, 255,  6, 6, 20, 30},
-        {1, 2, 255,  7, 6, 22, 30},
-        {1, 2, 255,  7, 6, 22, 30},
-        {1, 1, 255,  8, 9, 20, 30},
-        {1, 0, 255,  9, 8, 20, 30},
-        {1, 3, 255,  10, 5, 20, 30},
+        {0, 1, 11, 20, 30},
+        {0, 1, 12, 20, 32},
+        {0, 2, 4, 20, 30},
+        {2, 3, 7, 20, 30},
+        {0, 4, 14, 20, 30},
+        {1, 5, 9, 20, 30},
+        {1, 5, 10, 20, 32},
+        {1, 5, 9, 20, 30},
+        {0, 6, 6, 20, 30},
+        {2, 7, 6, 22, 30},
+        {2, 7, 6, 22, 30},
+        {1, 8, 9, 20, 30},
+        {0, 9, 8, 20, 30},
+        {3, 10, 5, 20, 30},
    };

    for(int i = 0; i < 14; i++) {
@ -38,7 +36,7 @@ TEST(TopsterTest, MaxIntValues) {
        scores[1] = data[i].primary_attr;
        scores[2] = data[i].secondary_attr;

-        KV kv(data[i].field_id, data[i].query_index, data[i].token_bits, data[i].key, data[i].key, 0, scores);
+        KV kv(data[i].query_index, data[i].key, data[i].key, 0, scores);
        topster.add(&kv);
    }

@ -79,7 +77,7 @@ TEST(TopsterTest, StableSorting) {

    for(auto id_score: records) {
        int64_t scores[3] = {id_score.second, 0, 0};
-        KV kv(0, 0, 0, id_score.first, id_score.first, 0, scores);
+        KV kv(0, id_score.first, id_score.first, 0, scores);
        topster1K.add(&kv);
    }

@ -96,7 +94,7 @@ TEST(TopsterTest, StableSorting) {

    for(auto id_score: records) {
        int64_t scores[3] = {id_score.second, 0, 0};
-        KV kv(0, 0, 0, id_score.first, id_score.first, 0, scores);
+        KV kv(0, id_score.first, id_score.first, 0, scores);
        topster250.add(&kv);
    }

@ -111,7 +109,7 @@ TEST(TopsterTest, StableSorting) {

    for(auto id_score: records) {
        int64_t scores[3] = {id_score.second, 0, 0};
-        KV kv(0, 0, 0, id_score.first, id_score.first, 0, scores);
+        KV kv(0, id_score.first, id_score.first, 0, scores);
        topster500.add(&kv);
    }

@ -126,7 +124,7 @@ TEST(TopsterTest, StableSorting) {

    for(auto id_score: records) {
        int64_t scores[3] = {id_score.second, 0, 0};
-        KV kv(0, 0, 0, id_score.first, id_score.first, 0, scores);
+        KV kv(0, id_score.first, id_score.first, 0, scores);
        topster750.add(&kv);
    }

@ -141,26 +139,24 @@ TEST(TopsterTest, MaxFloatValues) {
    Topster topster(5);

    struct {
-        uint8_t field_id;
        uint16_t query_index;
-        uint32_t token_bits;
        uint64_t key;
        uint64_t match_score;
        float primary_attr;
        int64_t secondary_attr;
    } data[12] = {
-            {1, 0,  255, 1, 11, 1.09, 30},
-            {1, 0,  255, 2, 11, -20, 30},
-            {1, 2,  255, 3, 11, -20, 30},
-            {1, 0,  255, 4, 11, 7.812, 30},
-            {1, 0,  255, 4, 11, 7.912, 30},
-            {1, 1,  255, 5, 11, 0.0, 34},
-            {1, 0,  255, 6, 11, -22, 30},
-            {1, 2,  255, 7, 11, -22, 30},
-            {1, 1,  255, 8, 11, -9.998, 30},
-            {1, 1,  255, 8, 11, -9.998, 30},
-            {1, 0,  255, 9, 11, -9.999, 30},
-            {1, 3,  255, 10, 11, -20, 30},
+        {0, 1, 11, 1.09, 30},
+        {0, 2, 11, -20, 30},
+        {2, 3, 11, -20, 30},
+        {0, 4, 11, 7.812, 30},
+        {0, 4, 11, 7.912, 30},
+        {1, 5, 11, 0.0, 34},
+        {0, 6, 11, -22, 30},
+        {2, 7, 11, -22, 30},
+        {1, 8, 11, -9.998, 30},
+        {1, 8, 11, -9.998, 30},
+        {0, 9, 11, -9.999, 30},
+        {3, 10, 11, -20, 30},
    };

    for(int i = 0; i < 12; i++) {
@ -169,7 +165,7 @@ TEST(TopsterTest, MaxFloatValues) {
        scores[1] = Index::float_to_int64_t(data[i].primary_attr);
        scores[2] = data[i].secondary_attr;

-        KV kv(data[i].field_id, data[i].query_index, data[i].token_bits, data[i].key, data[i].key, 0, scores);
+        KV kv(data[i].query_index, data[i].key, data[i].key, 0, scores);
        topster.add(&kv);
    }

@ -186,28 +182,26 @@ TEST(TopsterTest, DistinctIntValues) {
    Topster dist_topster(5, 2);

    struct {
-        uint8_t field_id;
        uint16_t query_index;
-        uint32_t token_bits;
        uint64_t distinct_key;
        uint64_t match_score;
        int64_t primary_attr;
        int64_t secondary_attr;
    } data[14] = {
-            {1, 0, 255, 1, 11, 20, 30},
-            {1, 0, 255, 1, 12, 20, 32},
-            {1, 0, 255, 2, 4, 20, 30},
-            {1, 2, 255, 3, 7, 20, 30},
-            {1, 0, 255, 4, 14, 20, 30},
-            {1, 1, 255, 5, 9, 20, 30},
-            {1, 1, 255, 5, 10, 20, 32},
-            {1, 1, 255, 5, 9, 20, 30},
-            {1, 0, 255, 6, 6, 20, 30},
-            {1, 2, 255, 7, 6, 22, 30},
-            {1, 2, 255, 7, 6, 22, 30},
-            {1, 1, 255, 8, 9, 20, 30},
-            {1, 0, 255, 9, 8, 20, 30},
-            {1, 3, 255, 10,  5, 20, 30},
+        {0, 1, 11, 20, 30},
+        {0, 1, 12, 20, 32},
+        {0, 2, 4, 20, 30},
+        {2, 3, 7, 20, 30},
+        {0, 4, 14, 20, 30},
+        {1, 5, 9, 20, 30},
+        {1, 5, 10, 20, 32},
+        {1, 5, 9, 20, 30},
+        {0, 6, 6, 20, 30},
+        {2, 7, 6, 22, 30},
+        {2, 7, 6, 22, 30},
+        {1, 8, 9, 20, 30},
+        {0, 9, 8, 20, 30},
+        {3, 10,  5, 20, 30},
    };

    for(int i = 0; i < 14; i++) {
@ -216,7 +210,7 @@ TEST(TopsterTest, DistinctIntValues) {
        scores[1] = data[i].primary_attr;
        scores[2] = data[i].secondary_attr;

-        KV kv(data[i].field_id, data[i].query_index, data[i].token_bits, i+100, data[i].distinct_key, 0, scores);
+        KV kv(data[i].query_index, i+100, data[i].distinct_key, 0, scores);
        dist_topster.add(&kv);
    }