Merge pull request #1052 from krunal1313/doc_count_with_grouping

adding found documents in results when group_by is enabled
2025-05-23 23:30:42 +08:00 · 2023-06-08 20:37:14 +05:30 · 2023-06-08 20:37:14 +05:30 · 6b418318b2
commit 6b418318b2
parent e91073c1a8 420f651148
2 changed files with 22 additions and 6 deletions
--- a/src/collection.cpp
+++ b/src/collection.cpp
@ -1384,7 +1384,7 @@ Option<nlohmann::json> Collection::search(std::string  raw_query,
    std::vector<std::vector<KV*>> raw_result_kvs;
    std::vector<std::vector<KV*>> override_result_kvs;

-    size_t total_found = 0;
+    size_t total = 0;

    std::vector<uint32_t> excluded_ids;
    std::vector<std::pair<uint32_t, uint32_t>> included_ids; // ID -> position
@ -1557,12 +1557,13 @@ Option<nlohmann::json> Collection::search(std::string  raw_query,

    // for grouping we have to aggregate group set sizes to a count value
    if(group_limit) {
-        total_found = search_params->groups_processed.size() + override_result_kvs.size();
+        total = search_params->groups_processed.size() + override_result_kvs.size();
    } else {
-        total_found = search_params->all_result_ids_len;
+        total = search_params->all_result_ids_len;
    }
+    

-    if(search_cutoff && total_found == 0) {
+    if(search_cutoff && total == 0) {
        // this can happen if other requests stopped this request from being processed
        // we should return an error so that request can be retried by client
        return Option<nlohmann::json>(408, "Request Timeout");
@ -1683,7 +1684,10 @@ Option<nlohmann::json> Collection::search(std::string  raw_query,
    }

    nlohmann::json result = nlohmann::json::object();
-    result["found"] = total_found;
+    result["found"] = total;
+    if(group_limit != 0) {
+        result["found_docs"] = search_params->all_result_ids_len;
+    }

    if(exclude_fields.count("out_of") == 0) {
        result["out_of"] = num_documents.load();
--- a/test/collection_grouping_test.cpp
+++ b/test/collection_grouping_test.cpp
@ -69,6 +69,7 @@ TEST_F(CollectionGroupingTest, GroupingBasics) {
                                   "", 10,
                                   {}, {}, {"size"}, 2).get();

+    ASSERT_EQ(12, res["found_docs"].get<size_t>());
    ASSERT_EQ(3, res["found"].get<size_t>());
    ASSERT_EQ(3, res["grouped_hits"].size());
    ASSERT_EQ(11, res["grouped_hits"][0]["group_key"][0].get<size_t>());
@ -116,6 +117,7 @@ TEST_F(CollectionGroupingTest, GroupingBasics) {
                             {}, {}, {"rating"}, 2).get();

    // 7 unique ratings
+    ASSERT_EQ(12, res["found_docs"].get<size_t>());
    ASSERT_EQ(7, res["found"].get<size_t>());
    ASSERT_EQ(7, res["grouped_hits"].size());
    ASSERT_FLOAT_EQ(4.4, res["grouped_hits"][0]["group_key"][0].get<float>());
@ -167,7 +169,7 @@ TEST_F(CollectionGroupingTest, GroupingCompoundKey) {
                                  spp::sparse_hash_set<std::string>(), 10, "", 30, 5,
                                  "", 10,
                                  {}, {}, {"size", "brand"}, 2).get();
-
+    ASSERT_EQ(12, res["found_docs"].get<size_t>());
    ASSERT_EQ(10, res["found"].get<size_t>());
    ASSERT_EQ(10, res["grouped_hits"].size());

@ -227,6 +229,7 @@ TEST_F(CollectionGroupingTest, GroupingCompoundKey) {
    ASSERT_STREQ("0", res["grouped_hits"][0]["hits"][1]["document"]["id"].get<std::string>().c_str());

    // total count and facet counts should be the same
+    ASSERT_EQ(12, res["found_docs"].get<size_t>());
    ASSERT_EQ(10, res["found"].get<size_t>());
    ASSERT_EQ(2, res["grouped_hits"].size());
    ASSERT_EQ(10, res["grouped_hits"][0]["group_key"][0].get<size_t>());
@ -313,6 +316,7 @@ TEST_F(CollectionGroupingTest, GroupingWithMultiFieldRelevance) {
                                 "", 10,
                                 {}, {}, {"genre"}, 2).get();

+    ASSERT_EQ(7, results["found_docs"].get<size_t>());
    ASSERT_EQ(3, results["found"].get<size_t>());
    ASSERT_EQ(3, results["grouped_hits"].size());

@ -345,6 +349,7 @@ TEST_F(CollectionGroupingTest, GroupingWithGropLimitOfOne) {
                                  "", 10,
                                  {}, {}, {"brand"}, 1).get();

+    ASSERT_EQ(12, res["found_docs"].get<size_t>());
    ASSERT_EQ(5, res["found"].get<size_t>());
    ASSERT_EQ(5, res["grouped_hits"].size());

@ -430,6 +435,7 @@ TEST_F(CollectionGroupingTest, GroupingWithArrayFieldAndOverride) {
                                  "", 10,
                                  {}, {}, {"colors"}, 2).get();

+    ASSERT_EQ(9, res["found_docs"].get<size_t>());
    ASSERT_EQ(4, res["found"].get<size_t>());
    ASSERT_EQ(4, res["grouped_hits"].size());

@ -611,6 +617,7 @@ TEST_F(CollectionGroupingTest, SortingOnGroupCount) {
                                   "", 10,
                                   {}, {}, {"size"}, 2).get();

+    ASSERT_EQ(12, res["found_docs"].get<size_t>());
    ASSERT_EQ(3, res["found"].get<size_t>());
    ASSERT_EQ(3, res["grouped_hits"].size());

@ -635,6 +642,7 @@ TEST_F(CollectionGroupingTest, SortingOnGroupCount) {
                                   "", 10,
                                   {}, {}, {"size"}, 2).get();

+    ASSERT_EQ(12, res2["found_docs"].get<size_t>());
    ASSERT_EQ(3, res2["found"].get<size_t>());
    ASSERT_EQ(3, res2["grouped_hits"].size());

@ -715,6 +723,7 @@ TEST_F(CollectionGroupingTest, SortingMoreThanMaxTopsterSize) {
                                   "", 10,
                                   {}, {}, {"size"}, 2).get();

+    ASSERT_EQ(1000, res["found_docs"].get<size_t>());
    ASSERT_EQ(300, res["found"].get<size_t>());
    ASSERT_EQ(100, res["grouped_hits"].size());

@ -734,6 +743,7 @@ TEST_F(CollectionGroupingTest, SortingMoreThanMaxTopsterSize) {
                                   "", 10,
                                   {}, {}, {"size"}, 2).get();

+    ASSERT_EQ(1000, res["found_docs"].get<size_t>());
    ASSERT_EQ(300, res["found"].get<size_t>());
    ASSERT_EQ(100, res["grouped_hits"].size());

@ -757,6 +767,7 @@ TEST_F(CollectionGroupingTest, SortingMoreThanMaxTopsterSize) {
                                   "", 10,
                                   {}, {}, {"size"}, 2).get();

+    ASSERT_EQ(1000, res2["found_docs"].get<size_t>());
    ASSERT_EQ(300, res2["found"].get<size_t>());
    ASSERT_EQ(100, res2["grouped_hits"].size());

@ -775,6 +786,7 @@ TEST_F(CollectionGroupingTest, SortingMoreThanMaxTopsterSize) {
                                   "", 10,
                                   {}, {}, {"size"}, 2).get();

+    ASSERT_EQ(1000, res2["found_docs"].get<size_t>());
    ASSERT_EQ(300, res2["found"].get<size_t>());
    ASSERT_EQ(100, res2["grouped_hits"].size());