mirror of
https://github.com/typesense/typesense.git
synced 2025-05-20 05:32:30 +08:00
Tweak relevancy scoring.
This commit is contained in:
parent
0341e693d1
commit
12c443e222
163
src/index.cpp
163
src/index.cpp
@ -1025,7 +1025,7 @@ void Index::do_facets(std::vector<facet> & facets, facet_query_t & facet_query,
|
||||
const std::vector<facet_info_t>& facet_infos,
|
||||
const size_t group_limit, const std::vector<std::string>& group_by_fields,
|
||||
const uint32_t* result_ids, size_t results_size) const {
|
||||
|
||||
|
||||
// assumed that facet fields have already been validated upstream
|
||||
for(size_t findex=0; findex < facets.size(); findex++) {
|
||||
auto& a_facet = facets[findex];
|
||||
@ -1139,7 +1139,7 @@ void Index::search_candidates(const uint8_t & field_id, bool field_is_array,
|
||||
std::vector<art_leaf*> actual_query_suggestion(token_candidates_vec.size());
|
||||
uint64 qhash;
|
||||
|
||||
uint32_t token_bits = (uint32_t(1) << 31); // top most bit set to guarantee atleast 1 bit set
|
||||
uint32_t token_bits = 0;
|
||||
uint32_t total_cost = next_suggestion(token_candidates_vec, n, actual_query_suggestion,
|
||||
query_suggestion, token_bits, qhash);
|
||||
|
||||
@ -1456,7 +1456,7 @@ void Index::do_filtering(uint32_t*& filter_ids, uint32_t& filter_ids_length,
|
||||
|
||||
if(f.is_single_geopoint()) {
|
||||
spp::sparse_hash_map<uint32_t, int64_t>* sort_field_index = sort_index.at(f.name);
|
||||
|
||||
|
||||
for(auto result_id: geo_result_ids) {
|
||||
// no need to check for existence of `result_id` because of indexer based pre-filtering above
|
||||
int64_t lat_lng = sort_field_index->at(result_id);
|
||||
@ -1703,7 +1703,7 @@ void Index::collate_included_ids(const std::vector<std::string>& q_included_toke
|
||||
scores[1] = int64_t(1);
|
||||
scores[2] = int64_t(1);
|
||||
|
||||
uint32_t token_bits = (uint32_t(1) << 31);
|
||||
uint32_t token_bits = 0;
|
||||
KV kv(field_id, searched_queries.size(), token_bits, seq_id, distinct_id, 0, scores);
|
||||
curated_topster->add(&kv);
|
||||
}
|
||||
@ -2359,60 +2359,51 @@ void Index::aggregate_and_score_fields(const std::vector<query_tokens_t>& field_
|
||||
<< "searched_query: " << searched_queries[kvs[kv_i]->query_index][0];*/
|
||||
}
|
||||
|
||||
uint32_t token_bits = (uint32_t(1) << 31); // top most bit set to guarantee atleast 1 bit set
|
||||
uint64_t total_typos = 0, total_distances = 0, min_typos = 1000;
|
||||
|
||||
uint64_t verbatim_match_fields = 0; // field value *exactly* same as query tokens
|
||||
uint64_t exact_match_fields = 0; // number of fields that contains all of query tokens
|
||||
uint64_t max_weighted_tokens_match = 0; // weighted max number of tokens matched in a field
|
||||
uint64_t total_token_matches = 0; // total matches across fields (including fuzzy ones)
|
||||
uint32_t token_bits = 0;
|
||||
int64_t max_field_match_score = 0;
|
||||
size_t max_field_match_index = 0;
|
||||
|
||||
//LOG(INFO) << "Init pop count: " << __builtin_popcount(token_bits);
|
||||
|
||||
for(size_t i = 0; i < num_search_fields; i++) {
|
||||
const auto field_id = (uint8_t)(FIELD_LIMIT_NUM - i);
|
||||
const size_t priority = the_fields[i].priority;
|
||||
const size_t weight = the_fields[i].weight;
|
||||
|
||||
//LOG(INFO) << "--- field index: " << i << ", priority: " << priority;
|
||||
|
||||
if(existing_field_kvs.count(field_id) != 0) {
|
||||
// for existing field, we will simply sum field-wise weighted scores
|
||||
token_bits |= existing_field_kvs[field_id]->token_bits;
|
||||
//LOG(INFO) << "existing_field_kvs.count pop count: " << __builtin_popcount(token_bits);
|
||||
|
||||
int64_t match_score = existing_field_kvs[field_id]->scores[existing_field_kvs[field_id]->match_score_index];
|
||||
token_bits |= existing_field_kvs[field_id]->token_bits;
|
||||
|
||||
// we will reassemble match score to use unique tokens for cross-field matches
|
||||
|
||||
uint64_t tokens_found = ((match_score >> 24) & 0xFF);
|
||||
uint64_t field_typos = 255 - ((match_score >> 16) & 0xFF);
|
||||
total_typos += (field_typos + 1) * priority;
|
||||
total_distances += ((100 - ((match_score >> 8) & 0xFF)) + 1) * priority;
|
||||
|
||||
uint64_t typo_score = ((match_score >> 16) & 0xFF);
|
||||
uint64_t proximity_score = ((match_score >> 8) & 0xFF);
|
||||
int64_t exact_match_score = match_score & 0xFF;
|
||||
verbatim_match_fields += (weight * exact_match_score);
|
||||
uint32_t unique_tokens_found = __builtin_popcount(existing_field_kvs[field_id]->token_bits);
|
||||
|
||||
uint64_t unique_tokens_found =
|
||||
int64_t(__builtin_popcount(existing_field_kvs[field_id]->token_bits)) - 1;
|
||||
|
||||
if(field_typos == 0 && unique_tokens_found == field_query_tokens[i].q_include_tokens.size()) {
|
||||
exact_match_fields += weight;
|
||||
// exclude dropped-token cases
|
||||
if(unique_tokens_found != field_query_tokens[0].q_include_tokens.size()) {
|
||||
exact_match_score = 0;
|
||||
}
|
||||
|
||||
auto weighted_tokens_match = (tokens_found * weight);
|
||||
if(weighted_tokens_match > max_weighted_tokens_match) {
|
||||
max_weighted_tokens_match = weighted_tokens_match;
|
||||
int64_t multi_field_match_score = (int64_t(unique_tokens_found) << 32) |
|
||||
(int64_t(typo_score) << 24) |
|
||||
(int64_t(proximity_score) << 16) |
|
||||
(int64_t(exact_match_score) << 8) |
|
||||
(int64_t(0) << 0);
|
||||
|
||||
if(multi_field_match_score > max_field_match_score) {
|
||||
max_field_match_score = multi_field_match_score;
|
||||
max_field_match_index = i;
|
||||
}
|
||||
|
||||
if(field_typos < min_typos) {
|
||||
min_typos = field_typos;
|
||||
}
|
||||
|
||||
total_token_matches += (weight * tokens_found);
|
||||
|
||||
/*LOG(INFO) << "seq_id: " << seq_id << ", total_typos: " << (255 - ((match_score >> 8) & 0xFF))
|
||||
<< ", weighted typos: " << std::max<uint64_t>((255 - ((match_score >> 8) & 0xFF)), 1) * priority
|
||||
<< ", total dist: " << (((match_score & 0xFF)))
|
||||
<< ", weighted dist: " << std::max<uint64_t>((100 - (match_score & 0xFF)), 1) * priority;*/
|
||||
/*LOG(INFO) << "seq_id: " << seq_id << ", tokens_found: " << tokens_found
|
||||
<< ", typo_score: " << typo_score
|
||||
<< ", proximity_score: " << proximity_score
|
||||
<< ", exact_match_score: " << exact_match_score
|
||||
<< ", unique_tokens_found: " << unique_tokens_found
|
||||
<< ", multi_field_match_score: " << multi_field_match_score;*/
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -2445,83 +2436,31 @@ void Index::aggregate_and_score_fields(const std::vector<query_tokens_t>& field_
|
||||
}
|
||||
|
||||
if(words_present != 0) {
|
||||
uint64_t match_score = Match::get_match_score(words_present, 0, 0);
|
||||
int64_t multi_field_match_score = (int64_t(words_present) << 32) |
|
||||
(int64_t(0) << 24) |
|
||||
(int64_t(0) << 16) |
|
||||
(int64_t(0) << 8) |
|
||||
(int64_t(0) << 0);
|
||||
|
||||
uint64_t tokens_found = ((match_score >> 24) & 0xFF);
|
||||
uint64_t field_typos = 255 - ((match_score >> 16) & 0xFF);
|
||||
total_distances += ((100 - ((match_score >> 8) & 0xFF)) + 1) * priority;
|
||||
total_typos += (field_typos + 1) * priority;
|
||||
|
||||
if(field_typos == 0 && tokens_found == field_query_tokens[i].q_include_tokens.size()) {
|
||||
exact_match_fields += weight;
|
||||
// not possible to calculate verbatim_match_fields accurately here, so we won't
|
||||
if(multi_field_match_score > max_field_match_score) {
|
||||
max_field_match_score = multi_field_match_score;
|
||||
max_field_match_index = i;
|
||||
}
|
||||
|
||||
auto weighted_tokens_match = (tokens_found * weight);
|
||||
|
||||
if(weighted_tokens_match > max_weighted_tokens_match) {
|
||||
max_weighted_tokens_match = weighted_tokens_match;
|
||||
}
|
||||
|
||||
if(field_typos < min_typos) {
|
||||
min_typos = field_typos;
|
||||
}
|
||||
|
||||
total_token_matches += (weight * tokens_found);
|
||||
//LOG(INFO) << "seq_id: " << seq_id << ", total_typos: " << ((match_score >> 8) & 0xFF);
|
||||
}
|
||||
}
|
||||
|
||||
// num tokens present across fields including those containing typos
|
||||
int64_t uniq_tokens_found = int64_t(__builtin_popcount(token_bits)) - 1;
|
||||
|
||||
// verbtaim match should not consider dropped-token cases
|
||||
if(uniq_tokens_found != field_query_tokens[0].q_include_tokens.size()) {
|
||||
// also check for synonyms
|
||||
bool found_verbatim_syn = false;
|
||||
for(const auto& synonym: field_query_tokens[0].q_synonyms) {
|
||||
if(uniq_tokens_found == synonym.size()) {
|
||||
found_verbatim_syn = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if(!found_verbatim_syn) {
|
||||
verbatim_match_fields = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// protect most significant byte from overflow, since topster uses int64_t
|
||||
verbatim_match_fields = std::min<uint64_t>(INT8_MAX, verbatim_match_fields);
|
||||
|
||||
exact_match_fields += verbatim_match_fields;
|
||||
exact_match_fields = std::min<uint64_t>(255, exact_match_fields);
|
||||
max_weighted_tokens_match = std::min<uint64_t>(255, max_weighted_tokens_match);
|
||||
total_typos = std::min<uint64_t>(255, total_typos);
|
||||
total_distances = std::min<uint64_t>(100, total_distances);
|
||||
|
||||
uint64_t aggregated_score = (
|
||||
(exact_match_fields << 48) | // number of fields that contain *all tokens* in the query
|
||||
(max_weighted_tokens_match << 40) | // weighted max number of tokens matched in a field
|
||||
(uniq_tokens_found << 32) | // number of unique tokens found across fields including typos
|
||||
((255 - min_typos) << 24) | // minimum typo cost across all fields
|
||||
(total_token_matches << 16) | // total matches across fields including typos
|
||||
((255 - total_typos) << 8) | // total typos across fields (weighted)
|
||||
((100 - total_distances) << 0) // total distances across fields (weighted)
|
||||
);
|
||||
uint32_t num_tokens_found = __builtin_popcount(token_bits);
|
||||
uint64_t aggregated_score = (int64_t(max_field_match_score) << 16) |
|
||||
(int64_t(num_tokens_found) << 8) |
|
||||
(int64_t(the_fields[max_field_match_index].weight) << 0);
|
||||
|
||||
//LOG(INFO) << "seq id: " << seq_id << ", aggregated_score: " << aggregated_score;
|
||||
|
||||
/*LOG(INFO) << "seq id: " << seq_id
|
||||
<< ", verbatim_match_fields: " << verbatim_match_fields
|
||||
<< ", exact_match_fields: " << exact_match_fields
|
||||
<< ", max_weighted_tokens_match: " << max_weighted_tokens_match
|
||||
<< ", uniq_tokens_found: " << uniq_tokens_found
|
||||
<< ", min typo score: " << (255 - min_typos)
|
||||
<< ", total_token_matches: " << total_token_matches
|
||||
<< ", typo score: " << (255 - total_typos)
|
||||
<< ", distance score: " << (100 - total_distances)
|
||||
<< ", aggregated_score: " << aggregated_score << ", token_bits: " << token_bits;*/
|
||||
LOG(INFO) << "seq id: " << seq_id
|
||||
<< ", num_tokens_found: " << num_tokens_found
|
||||
<< ", max_field_match_score: " << max_field_match_score
|
||||
<< ", matched field weight: " << the_fields[max_field_match_index].weight
|
||||
<< ", aggregated_score: " << aggregated_score;
|
||||
|
||||
kvs[0]->scores[kvs[0]->match_score_index] = aggregated_score;
|
||||
topster->add(kvs[0]);
|
||||
@ -2803,7 +2742,7 @@ void Index::do_infix_search(const std::vector<sort_by>& sort_fields_std,
|
||||
std::array<spp::sparse_hash_map<uint32_t, int64_t>*, 3> field_values;
|
||||
std::vector<size_t> geopoint_indices;
|
||||
populate_sort_mapping(sort_order, geopoint_indices, sort_fields_std, field_values);
|
||||
uint32_t token_bits = 255;
|
||||
uint32_t token_bits = 0;
|
||||
|
||||
std::sort(infix_ids.begin(), infix_ids.end());
|
||||
infix_ids.erase(std::unique( infix_ids.begin(), infix_ids.end() ), infix_ids.end());
|
||||
@ -2912,7 +2851,7 @@ void Index::compute_facet_infos(const std::vector<facet>& facets, facet_query_t&
|
||||
if(all_result_ids_len == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
for(size_t findex=0; findex < facets.size(); findex++) {
|
||||
const auto& a_facet = facets[findex];
|
||||
|
||||
@ -3086,7 +3025,7 @@ void Index::search_wildcard(const std::vector<filter>& filters,
|
||||
std::vector<size_t> geopoint_indices;
|
||||
populate_sort_mapping(sort_order, geopoint_indices, sort_fields_std, field_values);
|
||||
|
||||
uint32_t token_bits = 255;
|
||||
uint32_t token_bits = 0;
|
||||
const bool check_for_circuit_break = (filter_ids_length > 1000000);
|
||||
|
||||
//auto beginF = std::chrono::high_resolution_clock::now();
|
||||
|
@ -300,7 +300,7 @@ TEST_F(CollectionGroupingTest, GroupingWithMultiFieldRelevance) {
|
||||
ASSERT_STREQ("pop", results["grouped_hits"][0]["group_key"][0].get<std::string>().c_str());
|
||||
ASSERT_EQ(2, results["grouped_hits"][0]["hits"].size());
|
||||
ASSERT_STREQ("1", results["grouped_hits"][0]["hits"][0]["document"]["id"].get<std::string>().c_str());
|
||||
ASSERT_STREQ("2", results["grouped_hits"][0]["hits"][1]["document"]["id"].get<std::string>().c_str());
|
||||
ASSERT_STREQ("4", results["grouped_hits"][0]["hits"][1]["document"]["id"].get<std::string>().c_str());
|
||||
|
||||
ASSERT_STREQ("rock", results["grouped_hits"][1]["group_key"][0].get<std::string>().c_str());
|
||||
ASSERT_EQ(2, results["grouped_hits"][1]["hits"].size());
|
||||
@ -309,8 +309,8 @@ TEST_F(CollectionGroupingTest, GroupingWithMultiFieldRelevance) {
|
||||
|
||||
ASSERT_STREQ("country", results["grouped_hits"][2]["group_key"][0].get<std::string>().c_str());
|
||||
ASSERT_EQ(2, results["grouped_hits"][2]["hits"].size());
|
||||
ASSERT_STREQ("8", results["grouped_hits"][2]["hits"][0]["document"]["id"].get<std::string>().c_str());
|
||||
ASSERT_STREQ("3", results["grouped_hits"][2]["hits"][1]["document"]["id"].get<std::string>().c_str());
|
||||
ASSERT_STREQ("3", results["grouped_hits"][2]["hits"][0]["document"]["id"].get<std::string>().c_str());
|
||||
ASSERT_STREQ("8", results["grouped_hits"][2]["hits"][1]["document"]["id"].get<std::string>().c_str());
|
||||
|
||||
collectionManager.drop_collection("coll1");
|
||||
}
|
||||
|
@ -849,8 +849,8 @@ TEST_F(CollectionOverrideTest, DynamicFilteringExactMatchBasics) {
|
||||
{}, sort_fields, {2, 2, 2}, 10).get();
|
||||
|
||||
ASSERT_EQ(3, results["hits"].size());
|
||||
ASSERT_EQ("0", results["hits"][0]["document"]["id"].get<std::string>());
|
||||
ASSERT_EQ("1", results["hits"][1]["document"]["id"].get<std::string>());
|
||||
ASSERT_EQ("1", results["hits"][0]["document"]["id"].get<std::string>());
|
||||
ASSERT_EQ("0", results["hits"][1]["document"]["id"].get<std::string>());
|
||||
ASSERT_EQ("2", results["hits"][2]["document"]["id"].get<std::string>());
|
||||
|
||||
// with override, results will be different
|
||||
|
@ -1263,7 +1263,7 @@ TEST_F(CollectionSortingTest, TextMatchBucketRanking) {
|
||||
|
||||
nlohmann::json doc2;
|
||||
doc2["id"] = "1";
|
||||
doc2["title"] = "Mark Spencer";
|
||||
doc2["title"] = "Marks Spencer";
|
||||
doc2["description"] = "Sales Expert";
|
||||
doc2["points"] = 200;
|
||||
|
||||
|
@ -216,15 +216,15 @@ TEST_F(CollectionSpecificTest, OrderMultiFieldFuzzyMatch) {
|
||||
|
||||
nlohmann::json doc1;
|
||||
doc1["id"] = "0";
|
||||
doc1["title"] = "Moto Insta Share";
|
||||
doc1["title"] = "Moto Insta Charge";
|
||||
doc1["description"] = "Share information with this device.";
|
||||
doc1["points"] = 100;
|
||||
doc1["points"] = 50;
|
||||
|
||||
nlohmann::json doc2;
|
||||
doc2["id"] = "1";
|
||||
doc2["title"] = "Portable USB Store";
|
||||
doc2["description"] = "Use it to charge your phone.";
|
||||
doc2["points"] = 50;
|
||||
doc2["points"] = 100;
|
||||
|
||||
ASSERT_TRUE(coll1->add(doc1.dump()).ok());
|
||||
ASSERT_TRUE(coll1->add(doc2.dump()).ok());
|
||||
@ -314,6 +314,8 @@ TEST_F(CollectionSpecificTest, MultiFieldArrayRepeatingTokens) {
|
||||
auto results = coll1->search("rv345 cisco 18", {"title", "description", "attrs"}, "", {}, {}, {1}, 10,
|
||||
1, FREQUENCY, {true, true, true}).get();
|
||||
|
||||
LOG(INFO) << results;
|
||||
|
||||
ASSERT_EQ("1", results["hits"][0]["document"]["id"].get<std::string>());
|
||||
ASSERT_EQ("0", results["hits"][1]["document"]["id"].get<std::string>());
|
||||
|
||||
@ -703,15 +705,18 @@ TEST_F(CollectionSpecificTest, HighlightSecondaryFieldWithPrefixMatch) {
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 40, {}, {}, {}, 0,
|
||||
"<mark>", "</mark>", {1, 1}).get();
|
||||
|
||||
ASSERT_EQ("0", results["hits"][0]["document"]["id"].get<std::string>());
|
||||
ASSERT_EQ(2, results["hits"].size());
|
||||
|
||||
ASSERT_EQ(2, results["hits"][0]["highlights"].size());
|
||||
ASSERT_EQ("1", results["hits"][0]["document"]["id"].get<std::string>());
|
||||
ASSERT_EQ("0", results["hits"][1]["document"]["id"].get<std::string>());
|
||||
|
||||
ASSERT_EQ(2, results["hits"][1]["highlights"].size());
|
||||
|
||||
ASSERT_EQ("<mark>Function</mark>s and Equations",
|
||||
results["hits"][0]["highlights"][0]["snippet"].get<std::string>());
|
||||
results["hits"][1]["highlights"][0]["snippet"].get<std::string>());
|
||||
|
||||
ASSERT_EQ("Use a <mark>function</mark> to solve an equation.",
|
||||
results["hits"][0]["highlights"][1]["snippet"].get<std::string>());
|
||||
results["hits"][1]["highlights"][1]["snippet"].get<std::string>());
|
||||
|
||||
collectionManager.drop_collection("coll1");
|
||||
}
|
||||
@ -931,8 +936,8 @@ TEST_F(CollectionSpecificTest, DroppedTokensShouldNotBeDeemedAsVerbatimMatch) {
|
||||
"<mark>", "</mark>").get();
|
||||
|
||||
ASSERT_EQ(2, results["hits"].size());
|
||||
ASSERT_EQ("0", results["hits"][0]["document"]["id"].get<std::string>());
|
||||
ASSERT_EQ("1", results["hits"][1]["document"]["id"].get<std::string>());
|
||||
ASSERT_EQ("1", results["hits"][0]["document"]["id"].get<std::string>());
|
||||
ASSERT_EQ("0", results["hits"][1]["document"]["id"].get<std::string>());
|
||||
|
||||
results = coll1->search("john vegatable farmer", {"name", "description"},
|
||||
"", {}, {}, {1, 1}, 10,
|
||||
@ -942,8 +947,8 @@ TEST_F(CollectionSpecificTest, DroppedTokensShouldNotBeDeemedAsVerbatimMatch) {
|
||||
"<mark>", "</mark>").get();
|
||||
|
||||
ASSERT_EQ(2, results["hits"].size());
|
||||
ASSERT_EQ("0", results["hits"][0]["document"]["id"].get<std::string>());
|
||||
ASSERT_EQ("1", results["hits"][1]["document"]["id"].get<std::string>());
|
||||
ASSERT_EQ("1", results["hits"][0]["document"]["id"].get<std::string>());
|
||||
ASSERT_EQ("0", results["hits"][1]["document"]["id"].get<std::string>());
|
||||
|
||||
collectionManager.drop_collection("coll1");
|
||||
}
|
||||
@ -1398,43 +1403,6 @@ TEST_F(CollectionSpecificTest, ZeroWeightedField) {
|
||||
collectionManager.drop_collection("coll1");
|
||||
}
|
||||
|
||||
TEST_F(CollectionSpecificTest, ZeroWeightedFieldCannotPrioritizeExactMatch) {
|
||||
std::vector<field> fields = {field("name", field_types::STRING, false),
|
||||
field("category", field_types::STRING, false),
|
||||
field("points", field_types::INT32, false),};
|
||||
|
||||
Collection* coll1 = collectionManager.create_collection("coll1", 1, fields, "points").get();
|
||||
|
||||
nlohmann::json doc1;
|
||||
doc1["id"] = "0";
|
||||
doc1["name"] = "Levis";
|
||||
doc1["category"] = "mens";
|
||||
doc1["points"] = 3;
|
||||
|
||||
nlohmann::json doc2;
|
||||
doc2["id"] = "1";
|
||||
doc2["name"] = "Amazing from Levis";
|
||||
doc2["category"] = "mens";
|
||||
doc2["points"] = 5;
|
||||
|
||||
ASSERT_TRUE(coll1->add(doc1.dump()).ok());
|
||||
ASSERT_TRUE(coll1->add(doc2.dump()).ok());
|
||||
|
||||
auto results = coll1->search("levis", {"name", "category"},
|
||||
"", {}, {}, {0, 0}, 10,
|
||||
1, FREQUENCY, {false, false},
|
||||
2, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 10, {}, {}, {}, 0,
|
||||
"<mark>", "</mark>", {0, 1},
|
||||
1000, true).get();
|
||||
|
||||
ASSERT_EQ(2, results["hits"].size());
|
||||
ASSERT_EQ("1", results["hits"][0]["document"]["id"].get<std::string>());
|
||||
ASSERT_EQ("0", results["hits"][1]["document"]["id"].get<std::string>());
|
||||
|
||||
collectionManager.drop_collection("coll1");
|
||||
}
|
||||
|
||||
TEST_F(CollectionSpecificTest, ImportDocumentWithRepeatingIDInTheSameBatch) {
|
||||
std::vector<field> fields = {field("name", field_types::STRING, false),
|
||||
field("points", field_types::INT32, false),};
|
||||
@ -2316,7 +2284,7 @@ TEST_F(CollectionSpecificTest, HandleLargeWeights) {
|
||||
collectionManager.drop_collection("coll1");
|
||||
}
|
||||
|
||||
TEST_F(CollectionSpecificTest, VerbatimMatchShouldNotOverpowerHigherWeightedField) {
|
||||
TEST_F(CollectionSpecificTest, VerbatimMatchShouldOverpowerHigherWeightedField) {
|
||||
std::vector<field> fields = {field("title", field_types::STRING, false),
|
||||
field("description", field_types::STRING, false),
|
||||
field("points", field_types::INT32, false),};
|
||||
@ -2346,8 +2314,8 @@ TEST_F(CollectionSpecificTest, VerbatimMatchShouldNotOverpowerHigherWeightedFiel
|
||||
"<mark>", "</mark>", {4, 1}, 1000, true).get();
|
||||
|
||||
ASSERT_EQ(2, results["hits"].size());
|
||||
ASSERT_EQ("0", results["hits"][0]["document"]["id"].get<std::string>());
|
||||
ASSERT_EQ("1", results["hits"][1]["document"]["id"].get<std::string>());
|
||||
ASSERT_EQ("1", results["hits"][0]["document"]["id"].get<std::string>());
|
||||
ASSERT_EQ("0", results["hits"][1]["document"]["id"].get<std::string>());
|
||||
|
||||
collectionManager.drop_collection("coll1");
|
||||
}
|
||||
|
@ -3264,8 +3264,8 @@ TEST_F(CollectionTest, MultiFieldRelevance3) {
|
||||
ASSERT_EQ(2, results["found"].get<size_t>());
|
||||
ASSERT_EQ(2, results["hits"].size());
|
||||
|
||||
ASSERT_STREQ("0", results["hits"][0]["document"]["id"].get<std::string>().c_str());
|
||||
ASSERT_STREQ("1", results["hits"][1]["document"]["id"].get<std::string>().c_str());
|
||||
ASSERT_STREQ("1", results["hits"][0]["document"]["id"].get<std::string>().c_str());
|
||||
ASSERT_STREQ("0", results["hits"][1]["document"]["id"].get<std::string>().c_str());
|
||||
|
||||
collectionManager.drop_collection("coll1");
|
||||
}
|
||||
@ -3354,8 +3354,8 @@ TEST_F(CollectionTest, MultiFieldRelevance5) {
|
||||
ASSERT_EQ(3, results["hits"].size());
|
||||
|
||||
ASSERT_STREQ("0", results["hits"][0]["document"]["id"].get<std::string>().c_str());
|
||||
ASSERT_STREQ("1", results["hits"][1]["document"]["id"].get<std::string>().c_str());
|
||||
ASSERT_STREQ("2", results["hits"][2]["document"]["id"].get<std::string>().c_str());
|
||||
ASSERT_STREQ("2", results["hits"][1]["document"]["id"].get<std::string>().c_str());
|
||||
ASSERT_STREQ("1", results["hits"][2]["document"]["id"].get<std::string>().c_str());
|
||||
|
||||
results = coll1->search("Canada",
|
||||
{"company_name","field_a","country"}, "", {}, {}, {2}, 10, 1, FREQUENCY,
|
||||
@ -3367,8 +3367,8 @@ TEST_F(CollectionTest, MultiFieldRelevance5) {
|
||||
ASSERT_EQ(3, results["hits"].size());
|
||||
|
||||
ASSERT_STREQ("0", results["hits"][0]["document"]["id"].get<std::string>().c_str());
|
||||
ASSERT_STREQ("1", results["hits"][1]["document"]["id"].get<std::string>().c_str());
|
||||
ASSERT_STREQ("2", results["hits"][2]["document"]["id"].get<std::string>().c_str());
|
||||
ASSERT_STREQ("2", results["hits"][1]["document"]["id"].get<std::string>().c_str());
|
||||
ASSERT_STREQ("1", results["hits"][2]["document"]["id"].get<std::string>().c_str());
|
||||
|
||||
ASSERT_EQ(2, results["hits"][0]["highlights"].size());
|
||||
ASSERT_EQ("field_a", results["hits"][0]["highlights"][0]["field"].get<std::string>());
|
||||
@ -3376,21 +3376,21 @@ TEST_F(CollectionTest, MultiFieldRelevance5) {
|
||||
ASSERT_EQ("country", results["hits"][0]["highlights"][1]["field"].get<std::string>());
|
||||
ASSERT_EQ("<mark>Canada</mark>", results["hits"][0]["highlights"][1]["snippet"].get<std::string>());
|
||||
|
||||
ASSERT_EQ(2, results["hits"][1]["highlights"].size());
|
||||
ASSERT_EQ(1, results["hits"][1]["highlights"].size());
|
||||
ASSERT_EQ("field_a", results["hits"][1]["highlights"][0]["field"].get<std::string>());
|
||||
ASSERT_EQ("<mark>Canadoo</mark>", results["hits"][1]["highlights"][0]["snippet"].get<std::string>());
|
||||
ASSERT_EQ("company_name", results["hits"][1]["highlights"][1]["field"].get<std::string>());
|
||||
ASSERT_EQ("<mark>Canaida</mark> Corp", results["hits"][1]["highlights"][1]["snippet"].get<std::string>());
|
||||
|
||||
ASSERT_EQ(1, results["hits"][2]["highlights"].size());
|
||||
ASSERT_EQ(2, results["hits"][2]["highlights"].size());
|
||||
ASSERT_EQ("field_a", results["hits"][2]["highlights"][0]["field"].get<std::string>());
|
||||
ASSERT_EQ("<mark>Canadoo</mark>", results["hits"][2]["highlights"][0]["snippet"].get<std::string>());
|
||||
ASSERT_EQ("company_name", results["hits"][2]["highlights"][1]["field"].get<std::string>());
|
||||
ASSERT_EQ("<mark>Canaida</mark> Corp", results["hits"][2]["highlights"][1]["snippet"].get<std::string>());
|
||||
|
||||
collectionManager.drop_collection("coll1");
|
||||
}
|
||||
|
||||
TEST_F(CollectionTest, MultiFieldRelevance6) {
|
||||
// with exact match preference
|
||||
// with exact match, the number of fields with exact match will not be considered as a ranking signal
|
||||
Collection *coll1;
|
||||
|
||||
std::vector<field> fields = {field("title", field_types::STRING, false),
|
||||
@ -3427,8 +3427,8 @@ TEST_F(CollectionTest, MultiFieldRelevance6) {
|
||||
ASSERT_EQ(2, results["found"].get<size_t>());
|
||||
ASSERT_EQ(2, results["hits"].size());
|
||||
|
||||
ASSERT_STREQ("0", results["hits"][0]["document"]["id"].get<std::string>().c_str());
|
||||
ASSERT_STREQ("1", results["hits"][1]["document"]["id"].get<std::string>().c_str());
|
||||
ASSERT_STREQ("1", results["hits"][0]["document"]["id"].get<std::string>().c_str());
|
||||
ASSERT_STREQ("0", results["hits"][1]["document"]["id"].get<std::string>().c_str());
|
||||
|
||||
// when exact matches are disabled
|
||||
results = coll1->search("taylor swift",
|
||||
@ -3943,9 +3943,8 @@ TEST_F(CollectionTest, FieldSpecificNumTypos) {
|
||||
ASSERT_EQ(2, results["found"].get<size_t>());
|
||||
ASSERT_EQ(2, results["hits"].size());
|
||||
|
||||
ASSERT_STREQ("0", results["hits"][0]["document"]["id"].get<std::string>().c_str());
|
||||
ASSERT_STREQ("1", results["hits"][1]["document"]["id"].get<std::string>().c_str());
|
||||
|
||||
ASSERT_STREQ("1", results["hits"][0]["document"]["id"].get<std::string>().c_str());
|
||||
ASSERT_STREQ("0", results["hits"][1]["document"]["id"].get<std::string>().c_str());
|
||||
|
||||
results = coll1->search("tayylor",
|
||||
{"title", "artist"}, "", {}, {}, {0, 1}, 10, 1, FREQUENCY,
|
||||
@ -4082,8 +4081,8 @@ TEST_F(CollectionTest, FieldLevelPrefixConfiguration) {
|
||||
ASSERT_EQ(2, results["found"].get<size_t>());
|
||||
ASSERT_EQ(2, results["hits"].size());
|
||||
|
||||
ASSERT_STREQ("0", results["hits"][0]["document"]["id"].get<std::string>().c_str());
|
||||
ASSERT_STREQ("1", results["hits"][1]["document"]["id"].get<std::string>().c_str());
|
||||
ASSERT_STREQ("1", results["hits"][0]["document"]["id"].get<std::string>().c_str());
|
||||
ASSERT_STREQ("0", results["hits"][1]["document"]["id"].get<std::string>().c_str());
|
||||
|
||||
collectionManager.drop_collection("coll1");
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user