Fix grouping

This commit is contained in:
ozanarmagan 2023-09-27 11:38:04 +03:00
parent 0050134f1c
commit a99baf7e0a
2 changed files with 19 additions and 4 deletions

View File

@ -3187,11 +3187,26 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
return a.second < b.second;
});
std::vector<KV*> kvs;
if(group_limit != 0) {
for(auto& kv_map : topster->group_kv_map) {
for(int i = 0; i < kv_map.second->size; i++) {
kvs.push_back(kv_map.second->getKV(i));
}
}
} else {
for(int i = 0; i < topster->size; i++) {
kvs.push_back(topster->getKV(i));
}
}
std::sort(kvs.begin(), kvs.end(), Topster::is_greater);
topster->sort();
// Reciprocal rank fusion
// Score is sum of (1 / rank_of_document) * WEIGHT from each list (text match and vector search)
for(uint32_t i = 0; i < topster->size; i++) {
auto result = topster->getKV(i);
for(uint32_t i = 0; i < kvs.size(); i++) {
auto result = kvs[i];
if(result->match_score_index < 0 || result->match_score_index > 2) {
continue;
}

View File

@ -1830,13 +1830,13 @@ TEST_F(CollectionVectorTest, GroupByWithVectorSearch) {
auto res = coll1->search("title", {"title"}, "", {}, {}, {0}, 10, 1, FREQUENCY, {true}, Index::DROP_TOKENS_THRESHOLD,
spp::sparse_hash_set<std::string>(),
spp::sparse_hash_set<std::string>(), 10, "", 30, 5,
"", 10, {}, {}, {"group"}, 1,
"", 10, {}, {}, {"group"}, 3,
"<mark>", "</mark>", {}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7, fallback,
4, {off}, 32767, 32767, 2,
false, true, "vec:([0.96826, 0.94, 0.39557, 0.306488])").get();
ASSERT_EQ(1, res["grouped_hits"].size());
ASSERT_EQ(1, res["grouped_hits"][0]["hits"].size());
ASSERT_EQ(3, res["grouped_hits"][0]["hits"].size());
ASSERT_EQ(1, res["grouped_hits"][0]["hits"][0].count("vector_distance"));
res = coll1->search("*", {"title"}, "", {}, {}, {0}, 10, 1, FREQUENCY, {true}, Index::DROP_TOKENS_THRESHOLD,