From d1079c633c7372d2cae1ed77a26291acdff95a88 Mon Sep 17 00:00:00 2001 From: Krunal Gandhi Date: Thu, 22 Feb 2024 13:17:42 +0000 Subject: [PATCH] fix pinned hits with grouping and filter (#1572) * fix pinned_hits_with_grouping * remove repeated group_limit check --- include/field.h | 4 +- src/collection.cpp | 6 +- src/index.cpp | 24 +++++-- test/collection_grouping_test.cpp | 112 ++++++++++++++++++++++++++++++ 4 files changed, 137 insertions(+), 9 deletions(-) diff --git a/include/field.h b/include/field.h index 2a6f00e3..19d30926 100644 --- a/include/field.h +++ b/include/field.h @@ -761,9 +761,9 @@ struct facet { return false; } - explicit facet(const std::string& field_name, std::map facet_range = {}, + explicit facet(const std::string& field_name, uint32_t orig_index, std::map facet_range = {}, bool is_range_q = false, bool sort_by_alpha=false, const std::string& order="", - const std::string& sort_by_field="", uint32_t orig_index = 0) + const std::string& sort_by_field="") : field_name(field_name), facet_range_map(facet_range), is_range_query(is_range_q), is_sort_by_alpha(sort_by_alpha), sort_order(order), sort_field(sort_by_field), orig_index(orig_index) { diff --git a/src/collection.cpp b/src/collection.cpp index 6947a63d..1a4a2369 100644 --- a/src/collection.cpp +++ b/src/collection.cpp @@ -6059,7 +6059,7 @@ Option Collection::parse_facet(const std::string& facet_field, std::vector return Option(400, error); } - facet a_facet(field_name); + facet a_facet(field_name, facets.size()); //starting after "(" and excluding ")" auto range_string = std::string(facet_field.begin() + startpos + 1, facet_field.end() - 1); @@ -6209,7 +6209,7 @@ Option Collection::parse_facet(const std::string& facet_field, std::vector // Collect the fields that match the prefix and are marked as facet. for (auto field = pair.first; field != pair.second; field++) { if (field->facet) { - facets.emplace_back(facet(field->name)); + facets.emplace_back(facet(field->name, facets.size())); facets.back().is_wildcard_match = true; } } @@ -6278,7 +6278,7 @@ Option Collection::parse_facet(const std::string& facet_field, std::vector return Option(400, error); } - facets.emplace_back(facet(facet_field_copy, {}, false, sort_alpha, + facets.emplace_back(facet(facet_field_copy, facets.size(), {}, false, sort_alpha, order, sort_field)); } diff --git a/src/index.cpp b/src/index.cpp index 7497f526..daf21c1b 100644 --- a/src/index.cpp +++ b/src/index.cpp @@ -3661,16 +3661,16 @@ Option Index::search(std::vector& field_query_tokens, cons if(facet_infos[i].use_value_index) { #endif // value based faceting on a single thread - value_facets.emplace_back(this_facet.field_name, this_facet.facet_range_map, + value_facets.emplace_back(this_facet.field_name, this_facet.orig_index, this_facet.facet_range_map, this_facet.is_range_query, this_facet.is_sort_by_alpha, - this_facet.sort_order, this_facet.sort_field, i); + this_facet.sort_order, this_facet.sort_field); continue; } for(size_t j = 0; j < num_threads; j++) { - facet_batches[j].emplace_back(this_facet.field_name, this_facet.facet_range_map, + facet_batches[j].emplace_back(this_facet.field_name, this_facet.orig_index, this_facet.facet_range_map, this_facet.is_range_query, this_facet.is_sort_by_alpha, - this_facet.sort_order, this_facet.sort_field, i); + this_facet.sort_order, this_facet.sort_field); } } @@ -3785,6 +3785,22 @@ Option Index::search(std::vector& field_query_tokens, cons all_result_ids_len += curated_topster->size; + if(!included_ids_map.empty() && group_limit != 0) { + for (auto &acc_facet: facets) { + for (auto &facet_kv: acc_facet.result_map) { + facet_kv.second.count = acc_facet.hash_groups[facet_kv.first].size(); + + if (estimate_facets) { + facet_kv.second.count = size_t(double(facet_kv.second.count) * (100.0f / facet_sample_percent)); + } + } + + if (estimate_facets) { + acc_facet.sampled = true; + } + } + } + delete [] all_result_ids; //LOG(INFO) << "all_result_ids_len " << all_result_ids_len << " for index " << name; diff --git a/test/collection_grouping_test.cpp b/test/collection_grouping_test.cpp index 8a69c55f..d518f89b 100644 --- a/test/collection_grouping_test.cpp +++ b/test/collection_grouping_test.cpp @@ -1045,4 +1045,116 @@ TEST_F(CollectionGroupingTest, GroupByMultipleFacetFields) { ASSERT_EQ(1, (int) res["facet_counts"][1]["counts"][2]["count"]); ASSERT_STREQ("red", res["facet_counts"][1]["counts"][2]["value"].get().c_str()); +} + +TEST_F(CollectionGroupingTest, GroupByMultipleFacetFieldsWithFilter) { + auto res = coll_group->search("*", {}, "size:>10", {"colors", "brand"}, {}, {0}, 50, 1, FREQUENCY, + {false}, Index::DROP_TOKENS_THRESHOLD, + spp::sparse_hash_set(), + spp::sparse_hash_set(), 10, "", 30, 5, + "", 10, + {}, {}, {"size"}, 2).get(); + + ASSERT_EQ(5, res["found_docs"].get()); + ASSERT_EQ(2, res["found"].get()); + ASSERT_EQ(2, res["grouped_hits"].size()); + + ASSERT_EQ(11, res["grouped_hits"][0]["group_key"][0].get()); + ASSERT_EQ(2, res["grouped_hits"][0]["found"].get()); + ASSERT_EQ(2, res["grouped_hits"][0]["hits"].size()); + ASSERT_EQ("5", res["grouped_hits"][0]["hits"][0]["document"]["id"]); + ASSERT_FLOAT_EQ(4.8, res["grouped_hits"][0]["hits"][0]["document"]["rating"].get()); + ASSERT_EQ("1", res["grouped_hits"][0]["hits"][1]["document"]["id"]); + ASSERT_FLOAT_EQ(4.3, res["grouped_hits"][0]["hits"][1]["document"]["rating"].get()); + + ASSERT_EQ(12, res["grouped_hits"][1]["group_key"][0].get()); + ASSERT_EQ(3, res["grouped_hits"][1]["found"].get()); + ASSERT_EQ(2, res["grouped_hits"][1]["hits"].size()); + ASSERT_EQ("2", res["grouped_hits"][1]["hits"][0]["document"]["id"]); + ASSERT_FLOAT_EQ(4.6, res["grouped_hits"][1]["hits"][0]["document"]["rating"].get()); + ASSERT_EQ("8", res["grouped_hits"][1]["hits"][1]["document"]["id"]); + ASSERT_FLOAT_EQ(4.4, res["grouped_hits"][1]["hits"][1]["document"]["rating"].get()); + + ASSERT_STREQ("colors", res["facet_counts"][0]["field_name"].get().c_str()); + + ASSERT_EQ(2, (int) res["facet_counts"][0]["counts"][0]["count"]); + ASSERT_STREQ("blue", res["facet_counts"][0]["counts"][0]["value"].get().c_str()); + + ASSERT_EQ(2, (int) res["facet_counts"][0]["counts"][1]["count"]); + ASSERT_STREQ("white", res["facet_counts"][0]["counts"][1]["value"].get().c_str()); + + ASSERT_EQ(1, (int) res["facet_counts"][0]["counts"][2]["count"]); + ASSERT_STREQ("red", res["facet_counts"][0]["counts"][2]["value"].get().c_str()); + + ASSERT_STREQ("brand", res["facet_counts"][1]["field_name"].get().c_str()); + + ASSERT_EQ(2, (int) res["facet_counts"][1]["counts"][0]["count"]); + ASSERT_STREQ("Beta", res["facet_counts"][1]["counts"][0]["value"].get().c_str()); + + ASSERT_EQ(2, (int) res["facet_counts"][1]["counts"][1]["count"]); + ASSERT_STREQ("Omega", res["facet_counts"][1]["counts"][1]["value"].get().c_str()); + + ASSERT_EQ(1, (int) res["facet_counts"][1]["counts"][2]["count"]); + ASSERT_STREQ("Xorp", res["facet_counts"][1]["counts"][2]["value"].get().c_str()); +} + +TEST_F(CollectionGroupingTest, GroupByMultipleFacetFieldsWithPinning) { + auto res = coll_group->search("*", {}, "size:>10", {"colors", "brand"}, {}, {0}, 50, 1, FREQUENCY, + {false}, Index::DROP_TOKENS_THRESHOLD, + spp::sparse_hash_set(), + spp::sparse_hash_set(), 10, "", 30, 5, + "", 10, + {"3:1,4:2"}, {}, {"size"}, 2).get(); + + ASSERT_EQ(5, res["found_docs"].get()); + ASSERT_EQ(4, res["found"].get()); + ASSERT_EQ(4, res["grouped_hits"].size()); + + ASSERT_EQ(10, res["grouped_hits"][0]["group_key"][0].get()); + ASSERT_EQ(1, res["grouped_hits"][0]["hits"].size()); + ASSERT_EQ("3", res["grouped_hits"][0]["hits"][0]["document"]["id"]); + ASSERT_FLOAT_EQ(4.6, res["grouped_hits"][0]["hits"][0]["document"]["rating"].get()); + + ASSERT_EQ(10, res["grouped_hits"][1]["group_key"][0].get()); + ASSERT_EQ(1, res["grouped_hits"][1]["hits"].size()); + ASSERT_EQ("4", res["grouped_hits"][1]["hits"][0]["document"]["id"]); + ASSERT_FLOAT_EQ(4.8, res["grouped_hits"][1]["hits"][0]["document"]["rating"].get()); + + ASSERT_EQ(11, res["grouped_hits"][2]["group_key"][0].get()); + ASSERT_EQ(2, res["grouped_hits"][2]["found"].get()); + ASSERT_EQ(2, res["grouped_hits"][2]["hits"].size()); + ASSERT_EQ("5", res["grouped_hits"][2]["hits"][0]["document"]["id"]); + ASSERT_FLOAT_EQ(4.8, res["grouped_hits"][2]["hits"][0]["document"]["rating"].get()); + ASSERT_EQ("1", res["grouped_hits"][2]["hits"][1]["document"]["id"]); + ASSERT_FLOAT_EQ(4.3, res["grouped_hits"][2]["hits"][1]["document"]["rating"].get()); + + ASSERT_EQ(12, res["grouped_hits"][3]["group_key"][0].get()); + ASSERT_EQ(3, res["grouped_hits"][3]["found"].get()); + ASSERT_EQ(2, res["grouped_hits"][3]["hits"].size()); + ASSERT_EQ("2", res["grouped_hits"][3]["hits"][0]["document"]["id"]); + ASSERT_FLOAT_EQ(4.6, res["grouped_hits"][3]["hits"][0]["document"]["rating"].get()); + ASSERT_EQ("8", res["grouped_hits"][3]["hits"][1]["document"]["id"]); + ASSERT_FLOAT_EQ(4.4, res["grouped_hits"][3]["hits"][1]["document"]["rating"].get()); + + ASSERT_STREQ("colors", res["facet_counts"][0]["field_name"].get().c_str()); + + ASSERT_EQ(3, (int) res["facet_counts"][0]["counts"][0]["count"]); + ASSERT_STREQ("blue", res["facet_counts"][0]["counts"][0]["value"].get().c_str()); + + ASSERT_EQ(3, (int) res["facet_counts"][0]["counts"][1]["count"]); + ASSERT_STREQ("white", res["facet_counts"][0]["counts"][1]["value"].get().c_str()); + + ASSERT_EQ(1, (int) res["facet_counts"][0]["counts"][2]["count"]); + ASSERT_STREQ("red", res["facet_counts"][0]["counts"][2]["value"].get().c_str()); + + ASSERT_STREQ("brand", res["facet_counts"][1]["field_name"].get().c_str()); + + ASSERT_EQ(3, (int) res["facet_counts"][1]["counts"][0]["count"]); + ASSERT_STREQ("Beta", res["facet_counts"][1]["counts"][0]["value"].get().c_str()); + + ASSERT_EQ(3, (int) res["facet_counts"][1]["counts"][1]["count"]); + ASSERT_STREQ("Omega", res["facet_counts"][1]["counts"][1]["value"].get().c_str()); + + ASSERT_EQ(1, (int) res["facet_counts"][1]["counts"][2]["count"]); + ASSERT_STREQ("Xorp", res["facet_counts"][1]["counts"][2]["value"].get().c_str()); } \ No newline at end of file