Tweak relevancy scoring.

This commit is contained in:
Kishore Nallan 2022-03-06 18:06:12 +05:30
parent 0341e693d1
commit 12c443e222
6 changed files with 93 additions and 187 deletions

View File

@ -1025,7 +1025,7 @@ void Index::do_facets(std::vector<facet> & facets, facet_query_t & facet_query,
const std::vector<facet_info_t>& facet_infos,
const size_t group_limit, const std::vector<std::string>& group_by_fields,
const uint32_t* result_ids, size_t results_size) const {
// assumed that facet fields have already been validated upstream
for(size_t findex=0; findex < facets.size(); findex++) {
auto& a_facet = facets[findex];
@ -1139,7 +1139,7 @@ void Index::search_candidates(const uint8_t & field_id, bool field_is_array,
std::vector<art_leaf*> actual_query_suggestion(token_candidates_vec.size());
uint64 qhash;
uint32_t token_bits = (uint32_t(1) << 31); // top most bit set to guarantee atleast 1 bit set
uint32_t token_bits = 0;
uint32_t total_cost = next_suggestion(token_candidates_vec, n, actual_query_suggestion,
query_suggestion, token_bits, qhash);
@ -1456,7 +1456,7 @@ void Index::do_filtering(uint32_t*& filter_ids, uint32_t& filter_ids_length,
if(f.is_single_geopoint()) {
spp::sparse_hash_map<uint32_t, int64_t>* sort_field_index = sort_index.at(f.name);
for(auto result_id: geo_result_ids) {
// no need to check for existence of `result_id` because of indexer based pre-filtering above
int64_t lat_lng = sort_field_index->at(result_id);
@ -1703,7 +1703,7 @@ void Index::collate_included_ids(const std::vector<std::string>& q_included_toke
scores[1] = int64_t(1);
scores[2] = int64_t(1);
uint32_t token_bits = (uint32_t(1) << 31);
uint32_t token_bits = 0;
KV kv(field_id, searched_queries.size(), token_bits, seq_id, distinct_id, 0, scores);
curated_topster->add(&kv);
}
@ -2359,60 +2359,51 @@ void Index::aggregate_and_score_fields(const std::vector<query_tokens_t>& field_
<< "searched_query: " << searched_queries[kvs[kv_i]->query_index][0];*/
}
uint32_t token_bits = (uint32_t(1) << 31); // top most bit set to guarantee atleast 1 bit set
uint64_t total_typos = 0, total_distances = 0, min_typos = 1000;
uint64_t verbatim_match_fields = 0; // field value *exactly* same as query tokens
uint64_t exact_match_fields = 0; // number of fields that contains all of query tokens
uint64_t max_weighted_tokens_match = 0; // weighted max number of tokens matched in a field
uint64_t total_token_matches = 0; // total matches across fields (including fuzzy ones)
uint32_t token_bits = 0;
int64_t max_field_match_score = 0;
size_t max_field_match_index = 0;
//LOG(INFO) << "Init pop count: " << __builtin_popcount(token_bits);
for(size_t i = 0; i < num_search_fields; i++) {
const auto field_id = (uint8_t)(FIELD_LIMIT_NUM - i);
const size_t priority = the_fields[i].priority;
const size_t weight = the_fields[i].weight;
//LOG(INFO) << "--- field index: " << i << ", priority: " << priority;
if(existing_field_kvs.count(field_id) != 0) {
// for existing field, we will simply sum field-wise weighted scores
token_bits |= existing_field_kvs[field_id]->token_bits;
//LOG(INFO) << "existing_field_kvs.count pop count: " << __builtin_popcount(token_bits);
int64_t match_score = existing_field_kvs[field_id]->scores[existing_field_kvs[field_id]->match_score_index];
token_bits |= existing_field_kvs[field_id]->token_bits;
// we will reassemble match score to use unique tokens for cross-field matches
uint64_t tokens_found = ((match_score >> 24) & 0xFF);
uint64_t field_typos = 255 - ((match_score >> 16) & 0xFF);
total_typos += (field_typos + 1) * priority;
total_distances += ((100 - ((match_score >> 8) & 0xFF)) + 1) * priority;
uint64_t typo_score = ((match_score >> 16) & 0xFF);
uint64_t proximity_score = ((match_score >> 8) & 0xFF);
int64_t exact_match_score = match_score & 0xFF;
verbatim_match_fields += (weight * exact_match_score);
uint32_t unique_tokens_found = __builtin_popcount(existing_field_kvs[field_id]->token_bits);
uint64_t unique_tokens_found =
int64_t(__builtin_popcount(existing_field_kvs[field_id]->token_bits)) - 1;
if(field_typos == 0 && unique_tokens_found == field_query_tokens[i].q_include_tokens.size()) {
exact_match_fields += weight;
// exclude dropped-token cases
if(unique_tokens_found != field_query_tokens[0].q_include_tokens.size()) {
exact_match_score = 0;
}
auto weighted_tokens_match = (tokens_found * weight);
if(weighted_tokens_match > max_weighted_tokens_match) {
max_weighted_tokens_match = weighted_tokens_match;
int64_t multi_field_match_score = (int64_t(unique_tokens_found) << 32) |
(int64_t(typo_score) << 24) |
(int64_t(proximity_score) << 16) |
(int64_t(exact_match_score) << 8) |
(int64_t(0) << 0);
if(multi_field_match_score > max_field_match_score) {
max_field_match_score = multi_field_match_score;
max_field_match_index = i;
}
if(field_typos < min_typos) {
min_typos = field_typos;
}
total_token_matches += (weight * tokens_found);
/*LOG(INFO) << "seq_id: " << seq_id << ", total_typos: " << (255 - ((match_score >> 8) & 0xFF))
<< ", weighted typos: " << std::max<uint64_t>((255 - ((match_score >> 8) & 0xFF)), 1) * priority
<< ", total dist: " << (((match_score & 0xFF)))
<< ", weighted dist: " << std::max<uint64_t>((100 - (match_score & 0xFF)), 1) * priority;*/
/*LOG(INFO) << "seq_id: " << seq_id << ", tokens_found: " << tokens_found
<< ", typo_score: " << typo_score
<< ", proximity_score: " << proximity_score
<< ", exact_match_score: " << exact_match_score
<< ", unique_tokens_found: " << unique_tokens_found
<< ", multi_field_match_score: " << multi_field_match_score;*/
continue;
}
@ -2445,83 +2436,31 @@ void Index::aggregate_and_score_fields(const std::vector<query_tokens_t>& field_
}
if(words_present != 0) {
uint64_t match_score = Match::get_match_score(words_present, 0, 0);
int64_t multi_field_match_score = (int64_t(words_present) << 32) |
(int64_t(0) << 24) |
(int64_t(0) << 16) |
(int64_t(0) << 8) |
(int64_t(0) << 0);
uint64_t tokens_found = ((match_score >> 24) & 0xFF);
uint64_t field_typos = 255 - ((match_score >> 16) & 0xFF);
total_distances += ((100 - ((match_score >> 8) & 0xFF)) + 1) * priority;
total_typos += (field_typos + 1) * priority;
if(field_typos == 0 && tokens_found == field_query_tokens[i].q_include_tokens.size()) {
exact_match_fields += weight;
// not possible to calculate verbatim_match_fields accurately here, so we won't
if(multi_field_match_score > max_field_match_score) {
max_field_match_score = multi_field_match_score;
max_field_match_index = i;
}
auto weighted_tokens_match = (tokens_found * weight);
if(weighted_tokens_match > max_weighted_tokens_match) {
max_weighted_tokens_match = weighted_tokens_match;
}
if(field_typos < min_typos) {
min_typos = field_typos;
}
total_token_matches += (weight * tokens_found);
//LOG(INFO) << "seq_id: " << seq_id << ", total_typos: " << ((match_score >> 8) & 0xFF);
}
}
// num tokens present across fields including those containing typos
int64_t uniq_tokens_found = int64_t(__builtin_popcount(token_bits)) - 1;
// verbtaim match should not consider dropped-token cases
if(uniq_tokens_found != field_query_tokens[0].q_include_tokens.size()) {
// also check for synonyms
bool found_verbatim_syn = false;
for(const auto& synonym: field_query_tokens[0].q_synonyms) {
if(uniq_tokens_found == synonym.size()) {
found_verbatim_syn = true;
break;
}
}
if(!found_verbatim_syn) {
verbatim_match_fields = 0;
}
}
// protect most significant byte from overflow, since topster uses int64_t
verbatim_match_fields = std::min<uint64_t>(INT8_MAX, verbatim_match_fields);
exact_match_fields += verbatim_match_fields;
exact_match_fields = std::min<uint64_t>(255, exact_match_fields);
max_weighted_tokens_match = std::min<uint64_t>(255, max_weighted_tokens_match);
total_typos = std::min<uint64_t>(255, total_typos);
total_distances = std::min<uint64_t>(100, total_distances);
uint64_t aggregated_score = (
(exact_match_fields << 48) | // number of fields that contain *all tokens* in the query
(max_weighted_tokens_match << 40) | // weighted max number of tokens matched in a field
(uniq_tokens_found << 32) | // number of unique tokens found across fields including typos
((255 - min_typos) << 24) | // minimum typo cost across all fields
(total_token_matches << 16) | // total matches across fields including typos
((255 - total_typos) << 8) | // total typos across fields (weighted)
((100 - total_distances) << 0) // total distances across fields (weighted)
);
uint32_t num_tokens_found = __builtin_popcount(token_bits);
uint64_t aggregated_score = (int64_t(max_field_match_score) << 16) |
(int64_t(num_tokens_found) << 8) |
(int64_t(the_fields[max_field_match_index].weight) << 0);
//LOG(INFO) << "seq id: " << seq_id << ", aggregated_score: " << aggregated_score;
/*LOG(INFO) << "seq id: " << seq_id
<< ", verbatim_match_fields: " << verbatim_match_fields
<< ", exact_match_fields: " << exact_match_fields
<< ", max_weighted_tokens_match: " << max_weighted_tokens_match
<< ", uniq_tokens_found: " << uniq_tokens_found
<< ", min typo score: " << (255 - min_typos)
<< ", total_token_matches: " << total_token_matches
<< ", typo score: " << (255 - total_typos)
<< ", distance score: " << (100 - total_distances)
<< ", aggregated_score: " << aggregated_score << ", token_bits: " << token_bits;*/
LOG(INFO) << "seq id: " << seq_id
<< ", num_tokens_found: " << num_tokens_found
<< ", max_field_match_score: " << max_field_match_score
<< ", matched field weight: " << the_fields[max_field_match_index].weight
<< ", aggregated_score: " << aggregated_score;
kvs[0]->scores[kvs[0]->match_score_index] = aggregated_score;
topster->add(kvs[0]);
@ -2803,7 +2742,7 @@ void Index::do_infix_search(const std::vector<sort_by>& sort_fields_std,
std::array<spp::sparse_hash_map<uint32_t, int64_t>*, 3> field_values;
std::vector<size_t> geopoint_indices;
populate_sort_mapping(sort_order, geopoint_indices, sort_fields_std, field_values);
uint32_t token_bits = 255;
uint32_t token_bits = 0;
std::sort(infix_ids.begin(), infix_ids.end());
infix_ids.erase(std::unique( infix_ids.begin(), infix_ids.end() ), infix_ids.end());
@ -2912,7 +2851,7 @@ void Index::compute_facet_infos(const std::vector<facet>& facets, facet_query_t&
if(all_result_ids_len == 0) {
return;
}
for(size_t findex=0; findex < facets.size(); findex++) {
const auto& a_facet = facets[findex];
@ -3086,7 +3025,7 @@ void Index::search_wildcard(const std::vector<filter>& filters,
std::vector<size_t> geopoint_indices;
populate_sort_mapping(sort_order, geopoint_indices, sort_fields_std, field_values);
uint32_t token_bits = 255;
uint32_t token_bits = 0;
const bool check_for_circuit_break = (filter_ids_length > 1000000);
//auto beginF = std::chrono::high_resolution_clock::now();

View File

@ -300,7 +300,7 @@ TEST_F(CollectionGroupingTest, GroupingWithMultiFieldRelevance) {
ASSERT_STREQ("pop", results["grouped_hits"][0]["group_key"][0].get<std::string>().c_str());
ASSERT_EQ(2, results["grouped_hits"][0]["hits"].size());
ASSERT_STREQ("1", results["grouped_hits"][0]["hits"][0]["document"]["id"].get<std::string>().c_str());
ASSERT_STREQ("2", results["grouped_hits"][0]["hits"][1]["document"]["id"].get<std::string>().c_str());
ASSERT_STREQ("4", results["grouped_hits"][0]["hits"][1]["document"]["id"].get<std::string>().c_str());
ASSERT_STREQ("rock", results["grouped_hits"][1]["group_key"][0].get<std::string>().c_str());
ASSERT_EQ(2, results["grouped_hits"][1]["hits"].size());
@ -309,8 +309,8 @@ TEST_F(CollectionGroupingTest, GroupingWithMultiFieldRelevance) {
ASSERT_STREQ("country", results["grouped_hits"][2]["group_key"][0].get<std::string>().c_str());
ASSERT_EQ(2, results["grouped_hits"][2]["hits"].size());
ASSERT_STREQ("8", results["grouped_hits"][2]["hits"][0]["document"]["id"].get<std::string>().c_str());
ASSERT_STREQ("3", results["grouped_hits"][2]["hits"][1]["document"]["id"].get<std::string>().c_str());
ASSERT_STREQ("3", results["grouped_hits"][2]["hits"][0]["document"]["id"].get<std::string>().c_str());
ASSERT_STREQ("8", results["grouped_hits"][2]["hits"][1]["document"]["id"].get<std::string>().c_str());
collectionManager.drop_collection("coll1");
}

View File

@ -849,8 +849,8 @@ TEST_F(CollectionOverrideTest, DynamicFilteringExactMatchBasics) {
{}, sort_fields, {2, 2, 2}, 10).get();
ASSERT_EQ(3, results["hits"].size());
ASSERT_EQ("0", results["hits"][0]["document"]["id"].get<std::string>());
ASSERT_EQ("1", results["hits"][1]["document"]["id"].get<std::string>());
ASSERT_EQ("1", results["hits"][0]["document"]["id"].get<std::string>());
ASSERT_EQ("0", results["hits"][1]["document"]["id"].get<std::string>());
ASSERT_EQ("2", results["hits"][2]["document"]["id"].get<std::string>());
// with override, results will be different

View File

@ -1263,7 +1263,7 @@ TEST_F(CollectionSortingTest, TextMatchBucketRanking) {
nlohmann::json doc2;
doc2["id"] = "1";
doc2["title"] = "Mark Spencer";
doc2["title"] = "Marks Spencer";
doc2["description"] = "Sales Expert";
doc2["points"] = 200;

View File

@ -216,15 +216,15 @@ TEST_F(CollectionSpecificTest, OrderMultiFieldFuzzyMatch) {
nlohmann::json doc1;
doc1["id"] = "0";
doc1["title"] = "Moto Insta Share";
doc1["title"] = "Moto Insta Charge";
doc1["description"] = "Share information with this device.";
doc1["points"] = 100;
doc1["points"] = 50;
nlohmann::json doc2;
doc2["id"] = "1";
doc2["title"] = "Portable USB Store";
doc2["description"] = "Use it to charge your phone.";
doc2["points"] = 50;
doc2["points"] = 100;
ASSERT_TRUE(coll1->add(doc1.dump()).ok());
ASSERT_TRUE(coll1->add(doc2.dump()).ok());
@ -314,6 +314,8 @@ TEST_F(CollectionSpecificTest, MultiFieldArrayRepeatingTokens) {
auto results = coll1->search("rv345 cisco 18", {"title", "description", "attrs"}, "", {}, {}, {1}, 10,
1, FREQUENCY, {true, true, true}).get();
LOG(INFO) << results;
ASSERT_EQ("1", results["hits"][0]["document"]["id"].get<std::string>());
ASSERT_EQ("0", results["hits"][1]["document"]["id"].get<std::string>());
@ -703,15 +705,18 @@ TEST_F(CollectionSpecificTest, HighlightSecondaryFieldWithPrefixMatch) {
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 40, {}, {}, {}, 0,
"<mark>", "</mark>", {1, 1}).get();
ASSERT_EQ("0", results["hits"][0]["document"]["id"].get<std::string>());
ASSERT_EQ(2, results["hits"].size());
ASSERT_EQ(2, results["hits"][0]["highlights"].size());
ASSERT_EQ("1", results["hits"][0]["document"]["id"].get<std::string>());
ASSERT_EQ("0", results["hits"][1]["document"]["id"].get<std::string>());
ASSERT_EQ(2, results["hits"][1]["highlights"].size());
ASSERT_EQ("<mark>Function</mark>s and Equations",
results["hits"][0]["highlights"][0]["snippet"].get<std::string>());
results["hits"][1]["highlights"][0]["snippet"].get<std::string>());
ASSERT_EQ("Use a <mark>function</mark> to solve an equation.",
results["hits"][0]["highlights"][1]["snippet"].get<std::string>());
results["hits"][1]["highlights"][1]["snippet"].get<std::string>());
collectionManager.drop_collection("coll1");
}
@ -931,8 +936,8 @@ TEST_F(CollectionSpecificTest, DroppedTokensShouldNotBeDeemedAsVerbatimMatch) {
"<mark>", "</mark>").get();
ASSERT_EQ(2, results["hits"].size());
ASSERT_EQ("0", results["hits"][0]["document"]["id"].get<std::string>());
ASSERT_EQ("1", results["hits"][1]["document"]["id"].get<std::string>());
ASSERT_EQ("1", results["hits"][0]["document"]["id"].get<std::string>());
ASSERT_EQ("0", results["hits"][1]["document"]["id"].get<std::string>());
results = coll1->search("john vegatable farmer", {"name", "description"},
"", {}, {}, {1, 1}, 10,
@ -942,8 +947,8 @@ TEST_F(CollectionSpecificTest, DroppedTokensShouldNotBeDeemedAsVerbatimMatch) {
"<mark>", "</mark>").get();
ASSERT_EQ(2, results["hits"].size());
ASSERT_EQ("0", results["hits"][0]["document"]["id"].get<std::string>());
ASSERT_EQ("1", results["hits"][1]["document"]["id"].get<std::string>());
ASSERT_EQ("1", results["hits"][0]["document"]["id"].get<std::string>());
ASSERT_EQ("0", results["hits"][1]["document"]["id"].get<std::string>());
collectionManager.drop_collection("coll1");
}
@ -1398,43 +1403,6 @@ TEST_F(CollectionSpecificTest, ZeroWeightedField) {
collectionManager.drop_collection("coll1");
}
TEST_F(CollectionSpecificTest, ZeroWeightedFieldCannotPrioritizeExactMatch) {
std::vector<field> fields = {field("name", field_types::STRING, false),
field("category", field_types::STRING, false),
field("points", field_types::INT32, false),};
Collection* coll1 = collectionManager.create_collection("coll1", 1, fields, "points").get();
nlohmann::json doc1;
doc1["id"] = "0";
doc1["name"] = "Levis";
doc1["category"] = "mens";
doc1["points"] = 3;
nlohmann::json doc2;
doc2["id"] = "1";
doc2["name"] = "Amazing from Levis";
doc2["category"] = "mens";
doc2["points"] = 5;
ASSERT_TRUE(coll1->add(doc1.dump()).ok());
ASSERT_TRUE(coll1->add(doc2.dump()).ok());
auto results = coll1->search("levis", {"name", "category"},
"", {}, {}, {0, 0}, 10,
1, FREQUENCY, {false, false},
2, spp::sparse_hash_set<std::string>(),
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 10, {}, {}, {}, 0,
"<mark>", "</mark>", {0, 1},
1000, true).get();
ASSERT_EQ(2, results["hits"].size());
ASSERT_EQ("1", results["hits"][0]["document"]["id"].get<std::string>());
ASSERT_EQ("0", results["hits"][1]["document"]["id"].get<std::string>());
collectionManager.drop_collection("coll1");
}
TEST_F(CollectionSpecificTest, ImportDocumentWithRepeatingIDInTheSameBatch) {
std::vector<field> fields = {field("name", field_types::STRING, false),
field("points", field_types::INT32, false),};
@ -2316,7 +2284,7 @@ TEST_F(CollectionSpecificTest, HandleLargeWeights) {
collectionManager.drop_collection("coll1");
}
TEST_F(CollectionSpecificTest, VerbatimMatchShouldNotOverpowerHigherWeightedField) {
TEST_F(CollectionSpecificTest, VerbatimMatchShouldOverpowerHigherWeightedField) {
std::vector<field> fields = {field("title", field_types::STRING, false),
field("description", field_types::STRING, false),
field("points", field_types::INT32, false),};
@ -2346,8 +2314,8 @@ TEST_F(CollectionSpecificTest, VerbatimMatchShouldNotOverpowerHigherWeightedFiel
"<mark>", "</mark>", {4, 1}, 1000, true).get();
ASSERT_EQ(2, results["hits"].size());
ASSERT_EQ("0", results["hits"][0]["document"]["id"].get<std::string>());
ASSERT_EQ("1", results["hits"][1]["document"]["id"].get<std::string>());
ASSERT_EQ("1", results["hits"][0]["document"]["id"].get<std::string>());
ASSERT_EQ("0", results["hits"][1]["document"]["id"].get<std::string>());
collectionManager.drop_collection("coll1");
}

View File

@ -3264,8 +3264,8 @@ TEST_F(CollectionTest, MultiFieldRelevance3) {
ASSERT_EQ(2, results["found"].get<size_t>());
ASSERT_EQ(2, results["hits"].size());
ASSERT_STREQ("0", results["hits"][0]["document"]["id"].get<std::string>().c_str());
ASSERT_STREQ("1", results["hits"][1]["document"]["id"].get<std::string>().c_str());
ASSERT_STREQ("1", results["hits"][0]["document"]["id"].get<std::string>().c_str());
ASSERT_STREQ("0", results["hits"][1]["document"]["id"].get<std::string>().c_str());
collectionManager.drop_collection("coll1");
}
@ -3354,8 +3354,8 @@ TEST_F(CollectionTest, MultiFieldRelevance5) {
ASSERT_EQ(3, results["hits"].size());
ASSERT_STREQ("0", results["hits"][0]["document"]["id"].get<std::string>().c_str());
ASSERT_STREQ("1", results["hits"][1]["document"]["id"].get<std::string>().c_str());
ASSERT_STREQ("2", results["hits"][2]["document"]["id"].get<std::string>().c_str());
ASSERT_STREQ("2", results["hits"][1]["document"]["id"].get<std::string>().c_str());
ASSERT_STREQ("1", results["hits"][2]["document"]["id"].get<std::string>().c_str());
results = coll1->search("Canada",
{"company_name","field_a","country"}, "", {}, {}, {2}, 10, 1, FREQUENCY,
@ -3367,8 +3367,8 @@ TEST_F(CollectionTest, MultiFieldRelevance5) {
ASSERT_EQ(3, results["hits"].size());
ASSERT_STREQ("0", results["hits"][0]["document"]["id"].get<std::string>().c_str());
ASSERT_STREQ("1", results["hits"][1]["document"]["id"].get<std::string>().c_str());
ASSERT_STREQ("2", results["hits"][2]["document"]["id"].get<std::string>().c_str());
ASSERT_STREQ("2", results["hits"][1]["document"]["id"].get<std::string>().c_str());
ASSERT_STREQ("1", results["hits"][2]["document"]["id"].get<std::string>().c_str());
ASSERT_EQ(2, results["hits"][0]["highlights"].size());
ASSERT_EQ("field_a", results["hits"][0]["highlights"][0]["field"].get<std::string>());
@ -3376,21 +3376,21 @@ TEST_F(CollectionTest, MultiFieldRelevance5) {
ASSERT_EQ("country", results["hits"][0]["highlights"][1]["field"].get<std::string>());
ASSERT_EQ("<mark>Canada</mark>", results["hits"][0]["highlights"][1]["snippet"].get<std::string>());
ASSERT_EQ(2, results["hits"][1]["highlights"].size());
ASSERT_EQ(1, results["hits"][1]["highlights"].size());
ASSERT_EQ("field_a", results["hits"][1]["highlights"][0]["field"].get<std::string>());
ASSERT_EQ("<mark>Canadoo</mark>", results["hits"][1]["highlights"][0]["snippet"].get<std::string>());
ASSERT_EQ("company_name", results["hits"][1]["highlights"][1]["field"].get<std::string>());
ASSERT_EQ("<mark>Canaida</mark> Corp", results["hits"][1]["highlights"][1]["snippet"].get<std::string>());
ASSERT_EQ(1, results["hits"][2]["highlights"].size());
ASSERT_EQ(2, results["hits"][2]["highlights"].size());
ASSERT_EQ("field_a", results["hits"][2]["highlights"][0]["field"].get<std::string>());
ASSERT_EQ("<mark>Canadoo</mark>", results["hits"][2]["highlights"][0]["snippet"].get<std::string>());
ASSERT_EQ("company_name", results["hits"][2]["highlights"][1]["field"].get<std::string>());
ASSERT_EQ("<mark>Canaida</mark> Corp", results["hits"][2]["highlights"][1]["snippet"].get<std::string>());
collectionManager.drop_collection("coll1");
}
TEST_F(CollectionTest, MultiFieldRelevance6) {
// with exact match preference
// with exact match, the number of fields with exact match will not be considered as a ranking signal
Collection *coll1;
std::vector<field> fields = {field("title", field_types::STRING, false),
@ -3427,8 +3427,8 @@ TEST_F(CollectionTest, MultiFieldRelevance6) {
ASSERT_EQ(2, results["found"].get<size_t>());
ASSERT_EQ(2, results["hits"].size());
ASSERT_STREQ("0", results["hits"][0]["document"]["id"].get<std::string>().c_str());
ASSERT_STREQ("1", results["hits"][1]["document"]["id"].get<std::string>().c_str());
ASSERT_STREQ("1", results["hits"][0]["document"]["id"].get<std::string>().c_str());
ASSERT_STREQ("0", results["hits"][1]["document"]["id"].get<std::string>().c_str());
// when exact matches are disabled
results = coll1->search("taylor swift",
@ -3943,9 +3943,8 @@ TEST_F(CollectionTest, FieldSpecificNumTypos) {
ASSERT_EQ(2, results["found"].get<size_t>());
ASSERT_EQ(2, results["hits"].size());
ASSERT_STREQ("0", results["hits"][0]["document"]["id"].get<std::string>().c_str());
ASSERT_STREQ("1", results["hits"][1]["document"]["id"].get<std::string>().c_str());
ASSERT_STREQ("1", results["hits"][0]["document"]["id"].get<std::string>().c_str());
ASSERT_STREQ("0", results["hits"][1]["document"]["id"].get<std::string>().c_str());
results = coll1->search("tayylor",
{"title", "artist"}, "", {}, {}, {0, 1}, 10, 1, FREQUENCY,
@ -4082,8 +4081,8 @@ TEST_F(CollectionTest, FieldLevelPrefixConfiguration) {
ASSERT_EQ(2, results["found"].get<size_t>());
ASSERT_EQ(2, results["hits"].size());
ASSERT_STREQ("0", results["hits"][0]["document"]["id"].get<std::string>().c_str());
ASSERT_STREQ("1", results["hits"][1]["document"]["id"].get<std::string>().c_str());
ASSERT_STREQ("1", results["hits"][0]["document"]["id"].get<std::string>().c_str());
ASSERT_STREQ("0", results["hits"][1]["document"]["id"].get<std::string>().c_str());
collectionManager.drop_collection("coll1");
}