Add text match modes: max_score and max_weight.

This commit is contained in:
Kishore Nallan 2023-01-04 20:30:30 +05:30
parent f380bd5fa9
commit bc31be874a
8 changed files with 167 additions and 100 deletions

View File

@ -410,7 +410,8 @@ public:
const bool prioritize_token_position = false,
const std::string& vector_query_str = "",
const bool enable_highlight_v1 = true,
const uint64_t search_time_start_us = 0) const;
const uint64_t search_time_start_us = 0,
const text_match_type_t match_type = max_score) const;
Option<bool> get_filter_ids(const std::string & simple_filter_query,
std::vector<std::pair<size_t, uint32_t*>>& index_ids);

View File

@ -88,9 +88,15 @@ enum enable_t {
off
};
enum text_match_type_t {
max_score,
max_weight
};
struct search_args {
std::vector<query_tokens_t> field_query_tokens;
std::vector<search_field_t> search_fields;
const text_match_type_t match_type;
const filter_node_t* filter_tree_root;
std::vector<facet>& facets;
std::vector<std::pair<uint32_t, uint32_t>>& included_ids;
@ -135,6 +141,7 @@ struct search_args {
vector_query_t& vector_query;
search_args(std::vector<query_tokens_t> field_query_tokens, std::vector<search_field_t> search_fields,
const text_match_type_t match_type,
filter_node_t* filter_tree_root, std::vector<facet>& facets,
std::vector<std::pair<uint32_t, uint32_t>>& included_ids, std::vector<uint32_t> excluded_ids,
std::vector<sort_by>& sort_fields_std, facet_query_t facet_query, const std::vector<uint32_t>& num_typos,
@ -148,7 +155,7 @@ struct search_args {
const size_t max_extra_prefix, const size_t max_extra_suffix, const size_t facet_query_num_typos,
const bool filter_curated_hits, const enable_t split_join_tokens, vector_query_t& vector_query) :
field_query_tokens(field_query_tokens),
search_fields(search_fields), filter_tree_root(filter_tree_root), facets(facets),
search_fields(search_fields), match_type(match_type), filter_tree_root(filter_tree_root), facets(facets),
included_ids(included_ids), excluded_ids(excluded_ids), sort_fields_std(sort_fields_std),
facet_query(facet_query), num_typos(num_typos), max_facet_values(max_facet_values), per_page(per_page),
page(page), token_order(token_order), prefixes(prefixes),
@ -407,6 +414,7 @@ private:
size_t max_candidates) const;
void search_all_candidates(const size_t num_search_fields,
const text_match_type_t match_type,
const std::vector<search_field_t>& the_fields,
const uint32_t* filter_ids, size_t filter_ids_length,
const uint32_t* exclude_token_ids, size_t exclude_token_ids_size,
@ -627,6 +635,7 @@ public:
void run_search(search_args* search_params);
void search(std::vector<query_tokens_t>& field_query_tokens, const std::vector<search_field_t>& the_fields,
const text_match_type_t match_type,
filter_node_t const* const& filter_tree_root, std::vector<facet>& facets, facet_query_t& facet_query,
const std::vector<std::pair<uint32_t, uint32_t>>& included_ids,
const std::vector<uint32_t>& excluded_ids, std::vector<sort_by>& sort_fields_std,
@ -758,6 +767,7 @@ public:
spp::sparse_hash_set<uint64_t>& groups_processed) const;
void do_synonym_search(const std::vector<search_field_t>& the_fields,
const text_match_type_t match_type,
filter_node_t const* const& filter_tree_root,
const std::map<size_t, std::map<size_t, uint32_t>>& included_ids_map,
const std::vector<sort_by>& sort_fields_std, Topster* curated_topster,
@ -790,6 +800,7 @@ public:
void fuzzy_search_fields(const std::vector<search_field_t>& the_fields,
const std::vector<token_t>& query_tokens,
const text_match_type_t match_type,
const bool dropped_tokens,
const uint32_t* exclude_token_ids,
size_t exclude_token_ids_size,
@ -832,6 +843,7 @@ public:
const std::vector<bool>& prefixes,
const std::vector<search_field_t>& the_fields,
const size_t num_search_fields,
const text_match_type_t match_type,
const std::vector<sort_by>& sort_fields,
Topster* topster,
spp::sparse_hash_set<uint64_t>& groups_processed,

View File

@ -7,11 +7,9 @@
#include <unordered_map>
struct KV {
uint8_t field_id{};
int8_t match_score_index{};
uint16_t query_index{};
uint16_t array_index{};
uint32_t token_bits{};
uint64_t key{};
uint64_t distinct_key{};
int64_t scores[3]{}; // match score + 2 custom attributes
@ -19,10 +17,8 @@ struct KV {
// to be used only in final aggregation
uint64_t* query_indices = nullptr;
KV(uint8_t field_id, uint16_t queryIndex, uint32_t token_bits, uint64_t key, uint64_t distinct_key,
uint8_t match_score_index, const int64_t *scores):
field_id(field_id), match_score_index(match_score_index),
query_index(queryIndex), array_index(0), token_bits(token_bits), key(key),
KV(uint16_t queryIndex, uint64_t key, uint64_t distinct_key, uint8_t match_score_index, const int64_t *scores):
match_score_index(match_score_index), query_index(queryIndex), array_index(0), key(key),
distinct_key(distinct_key) {
this->scores[0] = scores[0];
this->scores[1] = scores[1];
@ -33,8 +29,8 @@ struct KV {
KV(KV& kv) = default;
KV(KV&& kv) noexcept : field_id(kv.field_id), match_score_index(kv.match_score_index),
query_index(kv.query_index), array_index(kv.array_index), token_bits(kv.token_bits),
KV(KV&& kv) noexcept : match_score_index(kv.match_score_index),
query_index(kv.query_index), array_index(kv.array_index),
key(kv.key), distinct_key(kv.distinct_key) {
scores[0] = kv.scores[0];
@ -47,11 +43,9 @@ struct KV {
KV& operator=(KV&& kv) noexcept {
if (this != &kv) {
field_id = kv.field_id;
match_score_index = kv.match_score_index;
query_index = kv.query_index;
array_index = kv.array_index;
token_bits = kv.token_bits;
key = kv.key;
distinct_key = kv.distinct_key;
@ -69,11 +63,9 @@ struct KV {
KV& operator=(KV& kv) noexcept {
if (this != &kv) {
field_id = kv.field_id;
match_score_index = kv.match_score_index;
query_index = kv.query_index;
array_index = kv.array_index;
token_bits = kv.token_bits;
key = kv.key;
distinct_key = kv.distinct_key;
@ -120,11 +112,9 @@ struct Topster {
kvs = new KV*[capacity];
for(size_t i=0; i<capacity; i++) {
data[i].field_id = 0;
data[i].match_score_index = 0;
data[i].query_index = 0;
data[i].array_index = i;
data[i].token_bits = 0;
data[i].key = 0;
data[i].distinct_key = 0;
kvs[i] = &data[i];

View File

@ -868,7 +868,8 @@ Option<nlohmann::json> Collection::search(const std::string & raw_query,
const bool prioritize_token_position,
const std::string& vector_query_str,
const bool enable_highlight_v1,
const uint64_t search_time_start_us) const {
const uint64_t search_time_start_us,
const text_match_type_t match_type) const {
std::shared_lock lock(mutex);
@ -1297,6 +1298,7 @@ Option<nlohmann::json> Collection::search(const std::string & raw_query,
size_t index_id = 0;
search_args* search_params = new search_args(field_query_tokens, weighted_search_fields,
match_type,
filter_tree_root, facets, included_ids, excluded_ids,
sort_fields_std, facet_query, num_typos, max_facet_values, max_hits,
per_page, page, token_order, prefixes,

View File

@ -697,6 +697,8 @@ Option<bool> CollectionManager::do_search(std::map<std::string, std::string>& re
const char *EXHAUSTIVE_SEARCH = "exhaustive_search";
const char *SPLIT_JOIN_TOKENS = "split_join_tokens";
const char *TEXT_MATCH_TYPE = "text_match_type";
const char *ENABLE_HIGHLIGHT_V1 = "enable_highlight_v1";
// enrich params with values from embedded params
@ -776,6 +778,7 @@ Option<bool> CollectionManager::do_search(std::map<std::string, std::string>& re
size_t max_extra_prefix = INT16_MAX;
size_t max_extra_suffix = INT16_MAX;
bool enable_highlight_v1 = true;
text_match_type_t match_type;
std::unordered_map<std::string, size_t*> unsigned_int_values = {
{MIN_LEN_1TYPO, &min_len_1typo},
@ -861,6 +864,13 @@ Option<bool> CollectionManager::do_search(std::map<std::string, std::string>& re
}
}
else if(key == TEXT_MATCH_TYPE) {
auto match_op = magic_enum::enum_cast<text_match_type_t>(val);
if(match_op.has_value()) {
match_type = match_op.value();
}
}
else {
auto find_int_it = unsigned_int_values.find(key);
if(find_int_it != unsigned_int_values.end()) {

View File

@ -1302,6 +1302,7 @@ void Index::aggregate_topster(Topster* agg_topster, Topster* index_topster) {
}
void Index::search_all_candidates(const size_t num_search_fields,
const text_match_type_t match_type,
const std::vector<search_field_t>& the_fields,
const uint32_t* filter_ids, size_t filter_ids_length,
const uint32_t* exclude_token_ids, size_t exclude_token_ids_size,
@ -1366,7 +1367,7 @@ void Index::search_all_candidates(const size_t num_search_fields,
//LOG(INFO) << "field_num_results: " << field_num_results << ", typo_tokens_threshold: " << typo_tokens_threshold;
search_across_fields(query_suggestion, num_typos, prefixes, the_fields, num_search_fields,
search_across_fields(query_suggestion, num_typos, prefixes, the_fields, num_search_fields, match_type,
sort_fields, topster,groups_processed,
searched_queries, qtoken_set, group_limit, group_by_fields,
prioritize_exact_match, prioritize_token_position,
@ -1930,6 +1931,7 @@ void Index::do_filtering_with_lock(uint32_t*& filter_ids,
void Index::run_search(search_args* search_params) {
search(search_params->field_query_tokens,
search_params->search_fields,
search_params->match_type,
search_params->filter_tree_root, search_params->facets, search_params->facet_query,
search_params->included_ids, search_params->excluded_ids,
search_params->sort_fields_std, search_params->num_typos,
@ -1986,9 +1988,7 @@ void Index::collate_included_ids(const std::vector<token_t>& q_included_tokens,
scores[1] = int64_t(1);
scores[2] = int64_t(1);
uint32_t token_bits = 0;
KV kv(0, searched_queries.size(), token_bits, seq_id, distinct_id, 0, scores);
KV kv(searched_queries.size(), seq_id, distinct_id, 0, scores);
curated_topster->add(&kv);
}
}
@ -2388,6 +2388,7 @@ void Index::search_infix(const std::string& query, const std::string& field_name
}
void Index::search(std::vector<query_tokens_t>& field_query_tokens, const std::vector<search_field_t>& the_fields,
const text_match_type_t match_type,
filter_node_t const* const& filter_tree_root, std::vector<facet>& facets, facet_query_t& facet_query,
const std::vector<std::pair<uint32_t, uint32_t>>& included_ids,
const std::vector<uint32_t>& excluded_ids, std::vector<sort_by>& sort_fields_std,
@ -2489,7 +2490,7 @@ void Index::search(std::vector<query_tokens_t>& field_query_tokens, const std::v
int64_t match_score_index = -1;
result_ids.push_back(seq_id);
KV kv(field_id, searched_queries.size(), 0, seq_id, distinct_id, match_score_index, scores);
KV kv(searched_queries.size(), seq_id, distinct_id, match_score_index, scores);
topster->add(&kv);
if (result_ids.size() == page * per_page) {
@ -2581,7 +2582,7 @@ void Index::search(std::vector<query_tokens_t>& field_query_tokens, const std::v
//LOG(INFO) << "SEQ_ID: " << seq_id << ", score: " << dist_label.first;
KV kv(0, searched_queries.size(), 0, seq_id, distinct_id, match_score_index, scores);
KV kv(searched_queries.size(), seq_id, distinct_id, match_score_index, scores);
topster->add(&kv);
nearest_ids.push_back(seq_id);
}
@ -2638,7 +2639,7 @@ void Index::search(std::vector<query_tokens_t>& field_query_tokens, const std::v
}
}
fuzzy_search_fields(the_fields, field_query_tokens[0].q_include_tokens, false, excluded_result_ids,
fuzzy_search_fields(the_fields, field_query_tokens[0].q_include_tokens, match_type, false, excluded_result_ids,
excluded_result_ids_size, filter_ids, filter_ids_length, curated_ids_sorted,
sort_fields_std, num_typos, searched_queries, qtoken_set, topster, groups_processed,
all_result_ids, all_result_ids_len, group_limit, group_by_fields, prioritize_exact_match,
@ -2675,7 +2676,7 @@ void Index::search(std::vector<query_tokens_t>& field_query_tokens, const std::v
space_resolved_queries[0][j].size(), 0);
}
fuzzy_search_fields(the_fields, resolved_tokens, false, excluded_result_ids,
fuzzy_search_fields(the_fields, resolved_tokens, match_type, false, excluded_result_ids,
excluded_result_ids_size, filter_ids, filter_ids_length, curated_ids_sorted,
sort_fields_std, num_typos, searched_queries, qtoken_set, topster, groups_processed,
all_result_ids, all_result_ids_len, group_limit, group_by_fields, prioritize_exact_match,
@ -2685,7 +2686,8 @@ void Index::search(std::vector<query_tokens_t>& field_query_tokens, const std::v
}
// do synonym based searches
do_synonym_search(the_fields, filter_tree_root, included_ids_map, sort_fields_std, curated_topster, token_order,
do_synonym_search(the_fields, match_type, filter_tree_root, included_ids_map, sort_fields_std,
curated_topster, token_order,
0, group_limit, group_by_fields, prioritize_exact_match, prioritize_token_position,
exhaustive_search, concurrency, prefixes,
min_len_1typo, min_len_2typo, max_candidates, curated_ids, curated_ids_sorted,
@ -2731,7 +2733,7 @@ void Index::search(std::vector<query_tokens_t>& field_query_tokens, const std::v
drop_token_prefixes.push_back(p && prefix_search);
}
fuzzy_search_fields(the_fields, truncated_tokens, true, excluded_result_ids,
fuzzy_search_fields(the_fields, truncated_tokens, match_type, true, excluded_result_ids,
excluded_result_ids_size, filter_ids, filter_ids_length, curated_ids_sorted,
sort_fields_std, num_typos, searched_queries, qtoken_set, topster, groups_processed,
all_result_ids, all_result_ids_len, group_limit, group_by_fields, prioritize_exact_match,
@ -2955,6 +2957,7 @@ void Index::process_curated_ids(const std::vector<std::pair<uint32_t, uint32_t>>
void Index::fuzzy_search_fields(const std::vector<search_field_t>& the_fields,
const std::vector<token_t>& query_tokens,
const text_match_type_t match_type,
const bool dropped_tokens,
const uint32_t* exclude_token_ids,
size_t exclude_token_ids_size,
@ -3260,7 +3263,7 @@ void Index::fuzzy_search_fields(const std::vector<search_field_t>& the_fields,
if(token_candidates_vec.size() == query_tokens.size()) {
std::vector<uint32_t> id_buff;
search_all_candidates(num_search_fields, the_fields, filter_ids, filter_ids_length,
search_all_candidates(num_search_fields, match_type, the_fields, filter_ids, filter_ids_length,
exclude_token_ids, exclude_token_ids_size,
sort_fields, token_candidates_vec, searched_queries, qtoken_set, topster,
groups_processed, all_result_ids, all_result_ids_len,
@ -3410,6 +3413,7 @@ void Index::search_across_fields(const std::vector<token_t>& query_tokens,
const std::vector<bool>& prefixes,
const std::vector<search_field_t>& the_fields,
const size_t num_search_fields,
const text_match_type_t match_type,
const std::vector<sort_by>& sort_fields,
Topster* topster,
spp::sparse_hash_set<uint64_t>& groups_processed,
@ -3519,7 +3523,7 @@ void Index::search_across_fields(const std::vector<token_t>& query_tokens,
}
}
int64_t max_field_match_score = 0, max_field_match_index = 0;
int64_t best_field_match_score = 0, best_field_weight = 0;
uint32_t num_matching_fields = 0;
for(size_t fi = 0; fi < field_to_tokens.size(); fi++) {
@ -3528,10 +3532,12 @@ void Index::search_across_fields(const std::vector<token_t>& query_tokens,
continue;
}
bool field_is_array = search_schema.at(the_fields[fi].name).is_array();
int64_t field_match_score = 0;
const int64_t field_weight = the_fields[fi].weight;
const bool field_is_array = search_schema.at(the_fields[fi].name).is_array();
int64_t field_match_score = 0;
bool single_exact_query_token = false;
if(total_cost == 0 && query_tokens.size() == 1) {
// does this candidate suggestion token match query token exactly?
single_exact_query_token = true;
@ -3543,9 +3549,14 @@ void Index::search_across_fields(const std::vector<token_t>& query_tokens,
prioritize_exact_match, single_exact_query_token, prioritize_token_position,
query_tokens.size(), syn_orig_num_tokens, token_postings);
if(field_match_score > max_field_match_score) {
max_field_match_score = field_match_score;
max_field_match_index = fi;
if(match_type == max_score && field_match_score > best_field_match_score) {
best_field_match_score = field_match_score;
best_field_weight = field_weight;
}
if(match_type == max_weight && field_weight > best_field_weight) {
best_field_weight = field_weight;
best_field_match_score = field_match_score;
}
num_matching_fields++;
@ -3561,7 +3572,7 @@ void Index::search_across_fields(const std::vector<token_t>& query_tokens,
int64_t match_score_index = -1;
compute_sort_scores(sort_fields, sort_order, field_values, geopoint_indices, seq_id, filter_index,
max_field_match_score, scores, match_score_index);
best_field_match_score, scores, match_score_index);
size_t query_len = query_tokens.size();
if(syn_orig_num_tokens != -1) {
@ -3572,26 +3583,39 @@ void Index::search_across_fields(const std::vector<token_t>& query_tokens,
// NOTE: `query_len` is total tokens matched across fields.
// Within a field, only a subset can match
// MAX_SCORE
// [ sign | tokens_matched | max_field_score | max_field_weight | num_matching_fields ]
// [ 1 | 4 | 48 | 8 | 3 ] (64 bits)
auto max_field_weight = std::min<size_t>(FIELD_MAX_WEIGHT, the_fields[max_field_match_index].weight);
// MAX_WEIGHT
// [ sign | tokens_matched | max_field_weight | max_field_score | num_matching_fields ]
// [ 1 | 4 | 8 | 48 | 3 ] (64 bits)
auto max_field_weight = std::min<size_t>(FIELD_MAX_WEIGHT, best_field_weight);
num_matching_fields = std::min<size_t>(7, num_matching_fields);
uint64_t aggregated_score = (int64_t(query_len) << 59) |
(int64_t(max_field_match_score) << 11) |
uint64_t aggregated_score = match_type == max_score ?
((int64_t(query_len) << 59) |
(int64_t(best_field_match_score) << 11) |
(int64_t(max_field_weight) << 3) |
(int64_t(num_matching_fields) << 0);
(int64_t(num_matching_fields) << 0))
:
((int64_t(query_len) << 59) |
(int64_t(max_field_weight) << 51) |
(int64_t(best_field_match_score) << 3) |
(int64_t(num_matching_fields) << 0))
;
/*LOG(INFO) << "seq_id: " << seq_id << ", query_len: " << query_len
<< ", syn_orig_num_tokens: " << syn_orig_num_tokens
<< ", max_field_match_score: " << max_field_match_score
<< ", max_field_match_index: " << max_field_match_index
<< ", field_weight: " << max_field_weight
<< ", best_field_match_score: " << best_field_match_score
<< ", max_field_weight: " << max_field_weight
<< ", num_matching_fields: " << num_matching_fields
<< ", aggregated_score: " << aggregated_score;*/
KV kv(0, searched_queries.size(), 0, seq_id, distinct_id, match_score_index, scores);
KV kv(searched_queries.size(), seq_id, distinct_id, match_score_index, scores);
if(match_score_index != -1) {
kv.scores[match_score_index] = aggregated_score;
}
@ -3949,6 +3973,7 @@ void Index::do_phrase_search(const size_t num_search_fields, const std::vector<s
}
void Index::do_synonym_search(const std::vector<search_field_t>& the_fields,
const text_match_type_t match_type,
filter_node_t const* const& filter_tree_root,
const std::map<size_t, std::map<size_t, uint32_t>>& included_ids_map,
const std::vector<sort_by>& sort_fields_std, Topster* curated_topster,
@ -3977,7 +4002,7 @@ void Index::do_synonym_search(const std::vector<search_field_t>& the_fields,
for (const auto& syn_tokens : q_pos_synonyms) {
query_hashes.clear();
fuzzy_search_fields(the_fields, syn_tokens, false, exclude_token_ids,
fuzzy_search_fields(the_fields, syn_tokens, match_type, false, exclude_token_ids,
exclude_token_ids_size, filter_ids, filter_ids_length, curated_ids_sorted,
sort_fields_std, {0}, searched_queries, qtoken_set, actual_topster, groups_processed,
all_result_ids, all_result_ids_len, group_limit, group_by_fields, prioritize_exact_match,
@ -4062,7 +4087,7 @@ void Index::do_infix_search(const size_t num_search_fields, const std::vector<se
groups_processed.emplace(distinct_id);
}
KV kv(field_id, searched_queries.size(), 0, seq_id, distinct_id, match_score_index, scores);
KV kv(searched_queries.size(), seq_id, distinct_id, match_score_index, scores);
actual_topster->add(&kv);
if(((i + 1) % (1 << 12)) == 0) {
@ -4386,7 +4411,7 @@ void Index::search_wildcard(filter_node_t const* const& filter_tree_root,
tgroups_processed[thread_id].emplace(distinct_id);
}
KV kv(0, searched_queries.size(), 0, seq_id, distinct_id, match_score_index, scores);
KV kv(searched_queries.size(), seq_id, distinct_id, match_score_index, scores);
topsters[thread_id]->add(&kv);
if(check_for_circuit_break && ((i + 1) % (1 << 15)) == 0) {
@ -4973,7 +4998,7 @@ void Index::score_results(const std::vector<sort_by> & sort_fields, const uint16
}
//LOG(INFO) << "Seq id: " << seq_id << ", match_score: " << match_score;
KV kv(field_id, query_index, token_bits, seq_id, distinct_id, match_score_index, scores);
KV kv(query_index, seq_id, distinct_id, match_score_index, scores);
topster->add(&kv);
//long long int timeNanos = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now() - begin).count();

View File

@ -1668,6 +1668,39 @@ TEST_F(CollectionSpecificMoreTest, PhraseMatchMultipleFields) {
ASSERT_EQ("0", res["hits"][1]["document"]["id"].get<std::string>());
}
TEST_F(CollectionSpecificMoreTest, WeightTakingPrecendeceOverMatch) {
nlohmann::json schema = R"({
"name": "coll1",
"fields": [
{"name": "brand", "type": "string"},
{"name": "title", "type": "string"}
]
})"_json;
Collection* coll1 = collectionManager.create_collection(schema).get();
nlohmann::json doc;
doc["id"] = "0";
doc["title"] = "Healthy Mayo";
doc["brand"] = "Light Plus";
ASSERT_TRUE(coll1->add(doc.dump()).ok());
doc["id"] = "1";
doc["title"] = "Healthy Light Mayo";
doc["brand"] = "Vegabond";
ASSERT_TRUE(coll1->add(doc.dump()).ok());
auto res = coll1->search("light mayo", {"brand", "title"}, "", {}, {}, {2}, 10, 1, FREQUENCY, {true}, 5,
spp::sparse_hash_set<std::string>(),
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 20, {}, {}, {}, 0,
"<mark>", "</mark>", {}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7, fallback,
4, {off}, 0, 0, 0, 2, false, "", true, 0, max_weight).get();
ASSERT_EQ(2, res["hits"].size());
ASSERT_EQ("0", res["hits"][0]["document"]["id"].get<std::string>());
ASSERT_EQ("1", res["hits"][1]["document"]["id"].get<std::string>());
}
TEST_F(CollectionSpecificMoreTest, HighlightOnFieldNameWithDot) {
nlohmann::json schema = R"({
"name": "coll1",

View File

@ -8,28 +8,26 @@ TEST(TopsterTest, MaxIntValues) {
Topster topster(5);
struct {
uint8_t field_id;
uint16_t query_index;
uint32_t token_bits;
uint64_t key;
uint64_t match_score;
int64_t primary_attr;
int64_t secondary_attr;
} data[14] = {
{1, 0, 255, 1, 11, 20, 30},
{1, 0, 255, 1, 12, 20, 32},
{1, 0, 255, 2, 4, 20, 30},
{1, 2, 255, 3, 7, 20, 30},
{1, 0, 255, 4, 14, 20, 30},
{1, 1, 255, 5, 9, 20, 30},
{1, 1, 255, 5, 10, 20, 32},
{1, 1, 255, 5, 9, 20, 30},
{1, 0, 255, 6, 6, 20, 30},
{1, 2, 255, 7, 6, 22, 30},
{1, 2, 255, 7, 6, 22, 30},
{1, 1, 255, 8, 9, 20, 30},
{1, 0, 255, 9, 8, 20, 30},
{1, 3, 255, 10, 5, 20, 30},
{0, 1, 11, 20, 30},
{0, 1, 12, 20, 32},
{0, 2, 4, 20, 30},
{2, 3, 7, 20, 30},
{0, 4, 14, 20, 30},
{1, 5, 9, 20, 30},
{1, 5, 10, 20, 32},
{1, 5, 9, 20, 30},
{0, 6, 6, 20, 30},
{2, 7, 6, 22, 30},
{2, 7, 6, 22, 30},
{1, 8, 9, 20, 30},
{0, 9, 8, 20, 30},
{3, 10, 5, 20, 30},
};
for(int i = 0; i < 14; i++) {
@ -38,7 +36,7 @@ TEST(TopsterTest, MaxIntValues) {
scores[1] = data[i].primary_attr;
scores[2] = data[i].secondary_attr;
KV kv(data[i].field_id, data[i].query_index, data[i].token_bits, data[i].key, data[i].key, 0, scores);
KV kv(data[i].query_index, data[i].key, data[i].key, 0, scores);
topster.add(&kv);
}
@ -79,7 +77,7 @@ TEST(TopsterTest, StableSorting) {
for(auto id_score: records) {
int64_t scores[3] = {id_score.second, 0, 0};
KV kv(0, 0, 0, id_score.first, id_score.first, 0, scores);
KV kv(0, id_score.first, id_score.first, 0, scores);
topster1K.add(&kv);
}
@ -96,7 +94,7 @@ TEST(TopsterTest, StableSorting) {
for(auto id_score: records) {
int64_t scores[3] = {id_score.second, 0, 0};
KV kv(0, 0, 0, id_score.first, id_score.first, 0, scores);
KV kv(0, id_score.first, id_score.first, 0, scores);
topster250.add(&kv);
}
@ -111,7 +109,7 @@ TEST(TopsterTest, StableSorting) {
for(auto id_score: records) {
int64_t scores[3] = {id_score.second, 0, 0};
KV kv(0, 0, 0, id_score.first, id_score.first, 0, scores);
KV kv(0, id_score.first, id_score.first, 0, scores);
topster500.add(&kv);
}
@ -126,7 +124,7 @@ TEST(TopsterTest, StableSorting) {
for(auto id_score: records) {
int64_t scores[3] = {id_score.second, 0, 0};
KV kv(0, 0, 0, id_score.first, id_score.first, 0, scores);
KV kv(0, id_score.first, id_score.first, 0, scores);
topster750.add(&kv);
}
@ -141,26 +139,24 @@ TEST(TopsterTest, MaxFloatValues) {
Topster topster(5);
struct {
uint8_t field_id;
uint16_t query_index;
uint32_t token_bits;
uint64_t key;
uint64_t match_score;
float primary_attr;
int64_t secondary_attr;
} data[12] = {
{1, 0, 255, 1, 11, 1.09, 30},
{1, 0, 255, 2, 11, -20, 30},
{1, 2, 255, 3, 11, -20, 30},
{1, 0, 255, 4, 11, 7.812, 30},
{1, 0, 255, 4, 11, 7.912, 30},
{1, 1, 255, 5, 11, 0.0, 34},
{1, 0, 255, 6, 11, -22, 30},
{1, 2, 255, 7, 11, -22, 30},
{1, 1, 255, 8, 11, -9.998, 30},
{1, 1, 255, 8, 11, -9.998, 30},
{1, 0, 255, 9, 11, -9.999, 30},
{1, 3, 255, 10, 11, -20, 30},
{0, 1, 11, 1.09, 30},
{0, 2, 11, -20, 30},
{2, 3, 11, -20, 30},
{0, 4, 11, 7.812, 30},
{0, 4, 11, 7.912, 30},
{1, 5, 11, 0.0, 34},
{0, 6, 11, -22, 30},
{2, 7, 11, -22, 30},
{1, 8, 11, -9.998, 30},
{1, 8, 11, -9.998, 30},
{0, 9, 11, -9.999, 30},
{3, 10, 11, -20, 30},
};
for(int i = 0; i < 12; i++) {
@ -169,7 +165,7 @@ TEST(TopsterTest, MaxFloatValues) {
scores[1] = Index::float_to_int64_t(data[i].primary_attr);
scores[2] = data[i].secondary_attr;
KV kv(data[i].field_id, data[i].query_index, data[i].token_bits, data[i].key, data[i].key, 0, scores);
KV kv(data[i].query_index, data[i].key, data[i].key, 0, scores);
topster.add(&kv);
}
@ -186,28 +182,26 @@ TEST(TopsterTest, DistinctIntValues) {
Topster dist_topster(5, 2);
struct {
uint8_t field_id;
uint16_t query_index;
uint32_t token_bits;
uint64_t distinct_key;
uint64_t match_score;
int64_t primary_attr;
int64_t secondary_attr;
} data[14] = {
{1, 0, 255, 1, 11, 20, 30},
{1, 0, 255, 1, 12, 20, 32},
{1, 0, 255, 2, 4, 20, 30},
{1, 2, 255, 3, 7, 20, 30},
{1, 0, 255, 4, 14, 20, 30},
{1, 1, 255, 5, 9, 20, 30},
{1, 1, 255, 5, 10, 20, 32},
{1, 1, 255, 5, 9, 20, 30},
{1, 0, 255, 6, 6, 20, 30},
{1, 2, 255, 7, 6, 22, 30},
{1, 2, 255, 7, 6, 22, 30},
{1, 1, 255, 8, 9, 20, 30},
{1, 0, 255, 9, 8, 20, 30},
{1, 3, 255, 10, 5, 20, 30},
{0, 1, 11, 20, 30},
{0, 1, 12, 20, 32},
{0, 2, 4, 20, 30},
{2, 3, 7, 20, 30},
{0, 4, 14, 20, 30},
{1, 5, 9, 20, 30},
{1, 5, 10, 20, 32},
{1, 5, 9, 20, 30},
{0, 6, 6, 20, 30},
{2, 7, 6, 22, 30},
{2, 7, 6, 22, 30},
{1, 8, 9, 20, 30},
{0, 9, 8, 20, 30},
{3, 10, 5, 20, 30},
};
for(int i = 0; i < 14; i++) {
@ -216,7 +210,7 @@ TEST(TopsterTest, DistinctIntValues) {
scores[1] = data[i].primary_attr;
scores[2] = data[i].secondary_attr;
KV kv(data[i].field_id, data[i].query_index, data[i].token_bits, i+100, data[i].distinct_key, 0, scores);
KV kv(data[i].query_index, i+100, data[i].distinct_key, 0, scores);
dist_topster.add(&kv);
}