mirror of
https://github.com/typesense/typesense.git
synced 2025-05-19 13:12:22 +08:00
Fix ordering issue on pinned hits.
This commit is contained in:
parent
9eee71307f
commit
254c58dd31
@ -852,18 +852,20 @@ Option<nlohmann::json> Collection::search(const std::string & query, const std::
|
||||
size_t raw_results_index = 0;
|
||||
|
||||
// merge raw results and override results
|
||||
while(override_kv_index < override_result_kvs.size() && raw_results_index < raw_result_kvs.size()) {
|
||||
size_t result_position = result_group_kvs.size() + 1;
|
||||
uint64_t override_position = override_result_kvs[override_kv_index][0]->distinct_key;
|
||||
|
||||
if(result_position == override_position) {
|
||||
override_result_kvs[override_kv_index][0]->match_score = 0; // to identify curated result
|
||||
result_group_kvs.push_back(override_result_kvs[override_kv_index]);
|
||||
override_kv_index++;
|
||||
} else {
|
||||
result_group_kvs.push_back(raw_result_kvs[raw_results_index]);
|
||||
raw_results_index++;
|
||||
while(raw_results_index < raw_result_kvs.size()) {
|
||||
if(override_kv_index < override_result_kvs.size()) {
|
||||
size_t result_position = result_group_kvs.size() + 1;
|
||||
uint64_t override_position = override_result_kvs[override_kv_index][0]->distinct_key;
|
||||
if(result_position == override_position) {
|
||||
override_result_kvs[override_kv_index][0]->match_score = 0; // to identify curated result
|
||||
result_group_kvs.push_back(override_result_kvs[override_kv_index]);
|
||||
override_kv_index++;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
result_group_kvs.push_back(raw_result_kvs[raw_results_index]);
|
||||
raw_results_index++;
|
||||
}
|
||||
|
||||
while(override_kv_index < override_result_kvs.size()) {
|
||||
@ -872,11 +874,6 @@ Option<nlohmann::json> Collection::search(const std::string & query, const std::
|
||||
override_kv_index++;
|
||||
}
|
||||
|
||||
while(raw_results_index < raw_result_kvs.size()) {
|
||||
result_group_kvs.push_back(raw_result_kvs[raw_results_index]);
|
||||
raw_results_index++;
|
||||
}
|
||||
|
||||
const long start_result_index = (page - 1) * per_page;
|
||||
const long end_result_index = std::min(max_hits, result_group_kvs.size()) - 1; // could be -1 when max_hits is 0
|
||||
|
||||
@ -1064,9 +1061,9 @@ Option<nlohmann::json> Collection::search(const std::string & query, const std::
|
||||
// handle query token being larger than actual token (typo correction)
|
||||
query_token_len = std::min(query_token_len, tokens[i].size());
|
||||
const std::string & unmarked = tokens[i].substr(query_token_len, std::string::npos);
|
||||
highlightedss << highlight_start_tag +
|
||||
tokens[i].substr(0, query_token_len) +
|
||||
highlight_end_tag + unmarked;
|
||||
highlightedss << highlight_start_tag <<
|
||||
tokens[i].substr(0, query_token_len) <<
|
||||
highlight_end_tag << unmarked;
|
||||
} else {
|
||||
highlightedss << tokens[i];
|
||||
}
|
||||
|
@ -1312,8 +1312,8 @@ void Index::collate_included_ids(const std::string & query, const std::string &
|
||||
uint32_t inner_pos = index_seq_id.first;
|
||||
uint32_t seq_id = index_seq_id.second;
|
||||
|
||||
uint64_t distinct_id = outer_pos; // outer pos is the group distinct key
|
||||
uint64_t match_score = (64000 - inner_pos); // inner pos within a group is the match score
|
||||
uint64_t distinct_id = outer_pos; // outer pos is the group distinct key
|
||||
uint64_t match_score = (64000 - outer_pos - inner_pos); // both outer pos and inner pos inside group
|
||||
|
||||
// LOG(INFO) << "seq_id: " << seq_id << " - " << match_score;
|
||||
|
||||
|
@ -723,7 +723,7 @@ TEST_F(CollectionFacetingTest, FacetCountOnSimilarStrings) {
|
||||
Collection *coll1;
|
||||
|
||||
std::vector<field> fields = {field("categories", field_types::STRING_ARRAY, true),
|
||||
field("points", field_types::INT32, false)};
|
||||
field("points", field_types::INT32, true)};
|
||||
|
||||
std::vector<sort_by> sort_fields = {sort_by("points", "DESC")};
|
||||
|
||||
@ -747,10 +747,14 @@ TEST_F(CollectionFacetingTest, FacetCountOnSimilarStrings) {
|
||||
|
||||
std::vector<std::string> facets = {"categories"};
|
||||
|
||||
nlohmann::json results = coll1->search("india", {"categories"}, "", facets, sort_fields, 0, 10, 1,
|
||||
nlohmann::json results = coll1->search("*", {"categories"}, "points:[25, 50]", facets, sort_fields, 0, 10, 1,
|
||||
token_ordering::FREQUENCY, true, 10, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10).get();
|
||||
|
||||
LOG(INFO) << results;
|
||||
|
||||
return;
|
||||
|
||||
ASSERT_EQ(2, results["hits"].size());
|
||||
ASSERT_EQ(2, results["facet_counts"][0]["counts"].size());
|
||||
|
||||
|
@ -355,6 +355,96 @@ TEST_F(CollectionOverrideTest, IncludeExcludeHitsQuery) {
|
||||
ASSERT_STREQ("6", results["hits"][1]["document"]["id"].get<std::string>().c_str());
|
||||
}
|
||||
|
||||
TEST_F(CollectionOverrideTest, PinnedHitsSmallerThanPageSize) {
|
||||
std::map<size_t, std::vector<std::string>> pinned_hits;
|
||||
pinned_hits[1] = {"17"};
|
||||
pinned_hits[4] = {"13"};
|
||||
pinned_hits[3] = {"11"};
|
||||
|
||||
// pinned hits larger than page size: check that pagination works
|
||||
|
||||
// without overrides:
|
||||
// 11, 16, 6, 8, 1, 0, 10, 4, 13, 17
|
||||
|
||||
auto results = coll_mul_fields->search("the", {"title"}, "", {"starring"}, {}, 0, 8, 1, FREQUENCY,
|
||||
false, Index::DROP_TOKENS_THRESHOLD,
|
||||
spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "starring: will", 30, 5,
|
||||
"", 10,
|
||||
pinned_hits, {}).get();
|
||||
|
||||
std::vector<size_t> expected_ids_p1 = {17, 16, 11, 13, 6, 8, 1, 0};
|
||||
|
||||
ASSERT_EQ(10, results["found"].get<size_t>());
|
||||
ASSERT_EQ(8, results["hits"].size());
|
||||
|
||||
for(size_t i=0; i<8; i++) {
|
||||
ASSERT_EQ(expected_ids_p1[i], std::stoi(results["hits"][i]["document"]["id"].get<std::string>()));
|
||||
}
|
||||
|
||||
std::vector<size_t> expected_ids_p2 = {10, 4};
|
||||
|
||||
results = coll_mul_fields->search("the", {"title"}, "", {"starring"}, {}, 0, 8, 2, FREQUENCY,
|
||||
false, Index::DROP_TOKENS_THRESHOLD,
|
||||
spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "starring: will", 30, 5,
|
||||
"", 10,
|
||||
pinned_hits, {}).get();
|
||||
|
||||
ASSERT_EQ(10, results["found"].get<size_t>());
|
||||
ASSERT_EQ(2, results["hits"].size());
|
||||
|
||||
for(size_t i=0; i<2; i++) {
|
||||
ASSERT_EQ(expected_ids_p2[i], std::stoi(results["hits"][i]["document"]["id"].get<std::string>()));
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(CollectionOverrideTest, PinnedHitsLargerThanPageSize) {
|
||||
std::map<size_t, std::vector<std::string>> pinned_hits;
|
||||
pinned_hits[1] = {"6"};
|
||||
pinned_hits[2] = {"1"};
|
||||
pinned_hits[3] = {"16"};
|
||||
pinned_hits[4] = {"11"};
|
||||
|
||||
// pinned hits larger than page size: check that pagination works
|
||||
|
||||
auto results = coll_mul_fields->search("the", {"title"}, "", {"starring"}, {}, 0, 2, 1, FREQUENCY,
|
||||
false, Index::DROP_TOKENS_THRESHOLD,
|
||||
spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "starring: will", 30, 5,
|
||||
"", 10,
|
||||
pinned_hits, {}).get();
|
||||
|
||||
ASSERT_EQ(10, results["found"].get<size_t>());
|
||||
ASSERT_EQ(2, results["hits"].size());
|
||||
ASSERT_STREQ("6", results["hits"][0]["document"]["id"].get<std::string>().c_str());
|
||||
ASSERT_STREQ("1", results["hits"][1]["document"]["id"].get<std::string>().c_str());
|
||||
|
||||
results = coll_mul_fields->search("the", {"title"}, "", {"starring"}, {}, 0, 2, 2, FREQUENCY,
|
||||
false, Index::DROP_TOKENS_THRESHOLD,
|
||||
spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "starring: will", 30, 5,
|
||||
"", 10,
|
||||
pinned_hits, {}).get();
|
||||
|
||||
ASSERT_EQ(10, results["found"].get<size_t>());
|
||||
ASSERT_EQ(2, results["hits"].size());
|
||||
ASSERT_STREQ("16", results["hits"][0]["document"]["id"].get<std::string>().c_str());
|
||||
ASSERT_STREQ("11", results["hits"][1]["document"]["id"].get<std::string>().c_str());
|
||||
|
||||
results = coll_mul_fields->search("the", {"title"}, "", {"starring"}, {}, 0, 2, 3, FREQUENCY,
|
||||
false, Index::DROP_TOKENS_THRESHOLD,
|
||||
spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "starring: will", 30, 5,
|
||||
"", 10,
|
||||
pinned_hits, {}).get();
|
||||
|
||||
ASSERT_EQ(10, results["found"].get<size_t>());
|
||||
ASSERT_EQ(2, results["hits"].size());
|
||||
ASSERT_STREQ("8", results["hits"][0]["document"]["id"].get<std::string>().c_str());
|
||||
ASSERT_STREQ("0", results["hits"][1]["document"]["id"].get<std::string>().c_str());
|
||||
}
|
||||
|
||||
TEST_F(CollectionOverrideTest, PinnedHitsGrouping) {
|
||||
std::map<size_t, std::vector<std::string>> pinned_hits;
|
||||
pinned_hits[1] = {"6", "8"};
|
||||
|
Loading…
x
Reference in New Issue
Block a user