Fix ordering issue on pinned hits.

This commit is contained in:
kishorenc 2020-11-18 06:58:44 +05:30
parent 9eee71307f
commit 254c58dd31
4 changed files with 114 additions and 23 deletions

View File

@ -852,18 +852,20 @@ Option<nlohmann::json> Collection::search(const std::string & query, const std::
size_t raw_results_index = 0;
// merge raw results and override results
while(override_kv_index < override_result_kvs.size() && raw_results_index < raw_result_kvs.size()) {
size_t result_position = result_group_kvs.size() + 1;
uint64_t override_position = override_result_kvs[override_kv_index][0]->distinct_key;
if(result_position == override_position) {
override_result_kvs[override_kv_index][0]->match_score = 0; // to identify curated result
result_group_kvs.push_back(override_result_kvs[override_kv_index]);
override_kv_index++;
} else {
result_group_kvs.push_back(raw_result_kvs[raw_results_index]);
raw_results_index++;
while(raw_results_index < raw_result_kvs.size()) {
if(override_kv_index < override_result_kvs.size()) {
size_t result_position = result_group_kvs.size() + 1;
uint64_t override_position = override_result_kvs[override_kv_index][0]->distinct_key;
if(result_position == override_position) {
override_result_kvs[override_kv_index][0]->match_score = 0; // to identify curated result
result_group_kvs.push_back(override_result_kvs[override_kv_index]);
override_kv_index++;
continue;
}
}
result_group_kvs.push_back(raw_result_kvs[raw_results_index]);
raw_results_index++;
}
while(override_kv_index < override_result_kvs.size()) {
@ -872,11 +874,6 @@ Option<nlohmann::json> Collection::search(const std::string & query, const std::
override_kv_index++;
}
while(raw_results_index < raw_result_kvs.size()) {
result_group_kvs.push_back(raw_result_kvs[raw_results_index]);
raw_results_index++;
}
const long start_result_index = (page - 1) * per_page;
const long end_result_index = std::min(max_hits, result_group_kvs.size()) - 1; // could be -1 when max_hits is 0
@ -1064,9 +1061,9 @@ Option<nlohmann::json> Collection::search(const std::string & query, const std::
// handle query token being larger than actual token (typo correction)
query_token_len = std::min(query_token_len, tokens[i].size());
const std::string & unmarked = tokens[i].substr(query_token_len, std::string::npos);
highlightedss << highlight_start_tag +
tokens[i].substr(0, query_token_len) +
highlight_end_tag + unmarked;
highlightedss << highlight_start_tag <<
tokens[i].substr(0, query_token_len) <<
highlight_end_tag << unmarked;
} else {
highlightedss << tokens[i];
}

View File

@ -1312,8 +1312,8 @@ void Index::collate_included_ids(const std::string & query, const std::string &
uint32_t inner_pos = index_seq_id.first;
uint32_t seq_id = index_seq_id.second;
uint64_t distinct_id = outer_pos; // outer pos is the group distinct key
uint64_t match_score = (64000 - inner_pos); // inner pos within a group is the match score
uint64_t distinct_id = outer_pos; // outer pos is the group distinct key
uint64_t match_score = (64000 - outer_pos - inner_pos); // both outer pos and inner pos inside group
// LOG(INFO) << "seq_id: " << seq_id << " - " << match_score;

View File

@ -723,7 +723,7 @@ TEST_F(CollectionFacetingTest, FacetCountOnSimilarStrings) {
Collection *coll1;
std::vector<field> fields = {field("categories", field_types::STRING_ARRAY, true),
field("points", field_types::INT32, false)};
field("points", field_types::INT32, true)};
std::vector<sort_by> sort_fields = {sort_by("points", "DESC")};
@ -747,10 +747,14 @@ TEST_F(CollectionFacetingTest, FacetCountOnSimilarStrings) {
std::vector<std::string> facets = {"categories"};
nlohmann::json results = coll1->search("india", {"categories"}, "", facets, sort_fields, 0, 10, 1,
nlohmann::json results = coll1->search("*", {"categories"}, "points:[25, 50]", facets, sort_fields, 0, 10, 1,
token_ordering::FREQUENCY, true, 10, spp::sparse_hash_set<std::string>(),
spp::sparse_hash_set<std::string>(), 10).get();
LOG(INFO) << results;
return;
ASSERT_EQ(2, results["hits"].size());
ASSERT_EQ(2, results["facet_counts"][0]["counts"].size());

View File

@ -355,6 +355,96 @@ TEST_F(CollectionOverrideTest, IncludeExcludeHitsQuery) {
ASSERT_STREQ("6", results["hits"][1]["document"]["id"].get<std::string>().c_str());
}
TEST_F(CollectionOverrideTest, PinnedHitsSmallerThanPageSize) {
std::map<size_t, std::vector<std::string>> pinned_hits;
pinned_hits[1] = {"17"};
pinned_hits[4] = {"13"};
pinned_hits[3] = {"11"};
// pinned hits larger than page size: check that pagination works
// without overrides:
// 11, 16, 6, 8, 1, 0, 10, 4, 13, 17
auto results = coll_mul_fields->search("the", {"title"}, "", {"starring"}, {}, 0, 8, 1, FREQUENCY,
false, Index::DROP_TOKENS_THRESHOLD,
spp::sparse_hash_set<std::string>(),
spp::sparse_hash_set<std::string>(), 10, "starring: will", 30, 5,
"", 10,
pinned_hits, {}).get();
std::vector<size_t> expected_ids_p1 = {17, 16, 11, 13, 6, 8, 1, 0};
ASSERT_EQ(10, results["found"].get<size_t>());
ASSERT_EQ(8, results["hits"].size());
for(size_t i=0; i<8; i++) {
ASSERT_EQ(expected_ids_p1[i], std::stoi(results["hits"][i]["document"]["id"].get<std::string>()));
}
std::vector<size_t> expected_ids_p2 = {10, 4};
results = coll_mul_fields->search("the", {"title"}, "", {"starring"}, {}, 0, 8, 2, FREQUENCY,
false, Index::DROP_TOKENS_THRESHOLD,
spp::sparse_hash_set<std::string>(),
spp::sparse_hash_set<std::string>(), 10, "starring: will", 30, 5,
"", 10,
pinned_hits, {}).get();
ASSERT_EQ(10, results["found"].get<size_t>());
ASSERT_EQ(2, results["hits"].size());
for(size_t i=0; i<2; i++) {
ASSERT_EQ(expected_ids_p2[i], std::stoi(results["hits"][i]["document"]["id"].get<std::string>()));
}
}
TEST_F(CollectionOverrideTest, PinnedHitsLargerThanPageSize) {
std::map<size_t, std::vector<std::string>> pinned_hits;
pinned_hits[1] = {"6"};
pinned_hits[2] = {"1"};
pinned_hits[3] = {"16"};
pinned_hits[4] = {"11"};
// pinned hits larger than page size: check that pagination works
auto results = coll_mul_fields->search("the", {"title"}, "", {"starring"}, {}, 0, 2, 1, FREQUENCY,
false, Index::DROP_TOKENS_THRESHOLD,
spp::sparse_hash_set<std::string>(),
spp::sparse_hash_set<std::string>(), 10, "starring: will", 30, 5,
"", 10,
pinned_hits, {}).get();
ASSERT_EQ(10, results["found"].get<size_t>());
ASSERT_EQ(2, results["hits"].size());
ASSERT_STREQ("6", results["hits"][0]["document"]["id"].get<std::string>().c_str());
ASSERT_STREQ("1", results["hits"][1]["document"]["id"].get<std::string>().c_str());
results = coll_mul_fields->search("the", {"title"}, "", {"starring"}, {}, 0, 2, 2, FREQUENCY,
false, Index::DROP_TOKENS_THRESHOLD,
spp::sparse_hash_set<std::string>(),
spp::sparse_hash_set<std::string>(), 10, "starring: will", 30, 5,
"", 10,
pinned_hits, {}).get();
ASSERT_EQ(10, results["found"].get<size_t>());
ASSERT_EQ(2, results["hits"].size());
ASSERT_STREQ("16", results["hits"][0]["document"]["id"].get<std::string>().c_str());
ASSERT_STREQ("11", results["hits"][1]["document"]["id"].get<std::string>().c_str());
results = coll_mul_fields->search("the", {"title"}, "", {"starring"}, {}, 0, 2, 3, FREQUENCY,
false, Index::DROP_TOKENS_THRESHOLD,
spp::sparse_hash_set<std::string>(),
spp::sparse_hash_set<std::string>(), 10, "starring: will", 30, 5,
"", 10,
pinned_hits, {}).get();
ASSERT_EQ(10, results["found"].get<size_t>());
ASSERT_EQ(2, results["hits"].size());
ASSERT_STREQ("8", results["hits"][0]["document"]["id"].get<std::string>().c_str());
ASSERT_STREQ("0", results["hits"][1]["document"]["id"].get<std::string>().c_str());
}
TEST_F(CollectionOverrideTest, PinnedHitsGrouping) {
std::map<size_t, std::vector<std::string>> pinned_hits;
pinned_hits[1] = {"6", "8"};