From cc46d213fb837348a209caaabde521055a624acc Mon Sep 17 00:00:00 2001 From: krunal Date: Tue, 10 Oct 2023 15:42:16 +0530 Subject: [PATCH] min/max of all matched facets --- include/num_tree.h | 1 + src/index.cpp | 10 +++ src/num_tree.cpp | 20 ++++++ test/collection_optimized_faceting_test.cpp | 73 ++++++++++++++++++++- 4 files changed, 102 insertions(+), 2 deletions(-) diff --git a/include/num_tree.h b/include/num_tree.h index 30b2aaf4..5315922a 100644 --- a/include/num_tree.h +++ b/include/num_tree.h @@ -56,4 +56,5 @@ public: size_t& result_ids_len, uint32_t*& result_ids) const; + std::pair get_min_max(const uint32_t* result_ids, size_t result_ids_len); }; \ No newline at end of file diff --git a/src/index.cpp b/src/index.cpp index f3772be1..8c7c8725 100644 --- a/src/index.cpp +++ b/src/index.cpp @@ -1354,6 +1354,16 @@ void Index::do_facets(std::vector & facets, facet_query_t & facet_query, } } } + + if(should_compute_stats) { + auto numerical_index_it = numerical_index.find(a_facet.field_name); + if(numerical_index_it != numerical_index.end()) { + auto min_max_pair = numerical_index_it->second->get_min_max(result_ids, + results_size); + a_facet.stats.fvmin = int64_t_to_float(min_max_pair.first); + a_facet.stats.fvmax = int64_t_to_float(min_max_pair.second); + } + } } else { //LOG(INFO) << "Using hashing to find facets"; bool facet_hash_index_exists = facet_index_v4->has_hash_index(facet_field.name); diff --git a/src/num_tree.cpp b/src/num_tree.cpp index 94aa7b97..3e142212 100644 --- a/src/num_tree.cpp +++ b/src/num_tree.cpp @@ -341,6 +341,26 @@ void num_tree_t::seq_ids_outside_top_k(size_t k, std::vector &seq_ids) } } +std::pair num_tree_t::get_min_max(const uint32_t* result_ids, size_t result_ids_len) { + int64_t min, max; + //first traverse from top to find min + for(auto int64map_it = int64map.begin(); int64map_it != int64map.end(); ++int64map_it) { + if(ids_t::intersect_count(int64map_it->second, result_ids, result_ids_len)) { + min = int64map_it->first; + break; + } + } + + //traverse from end to find max + for(auto int64map_it = int64map.rbegin(); int64map_it != int64map.rend(); ++int64map_it) { + if(ids_t::intersect_count(int64map_it->second, result_ids, result_ids_len)) { + max = int64map_it->first; + break; + } + } + + return std::make_pair(min, max); +} size_t num_tree_t::size() { return int64map.size(); diff --git a/test/collection_optimized_faceting_test.cpp b/test/collection_optimized_faceting_test.cpp index 86b94238..bf6ad1a4 100644 --- a/test/collection_optimized_faceting_test.cpp +++ b/test/collection_optimized_faceting_test.cpp @@ -315,8 +315,8 @@ TEST_F(CollectionOptimizedFacetingTest, FacetCounts) { ASSERT_EQ(5, results["facet_counts"][0]["stats"].size()); ASSERT_FLOAT_EQ(7.812, results["facet_counts"][0]["stats"]["avg"].get()); - ASSERT_FLOAT_EQ(7.812, results["facet_counts"][0]["stats"]["min"].get()); - ASSERT_FLOAT_EQ(7.812, results["facet_counts"][0]["stats"]["max"].get()); + ASSERT_FLOAT_EQ(0, results["facet_counts"][0]["stats"]["min"].get()); + ASSERT_FLOAT_EQ(9.9989996, results["facet_counts"][0]["stats"]["max"].get()); ASSERT_FLOAT_EQ(7.812, results["facet_counts"][0]["stats"]["sum"].get()); ASSERT_FLOAT_EQ(1, results["facet_counts"][0]["stats"]["total_values"].get()); @@ -2125,4 +2125,73 @@ TEST_F(CollectionOptimizedFacetingTest, FacetSortValidation) { ASSERT_EQ("Fusion Plus", results["facet_counts"][0]["counts"][0]["value"]); ASSERT_EQ("Oneplus 11R", results["facet_counts"][0]["counts"][1]["value"]); ASSERT_EQ("S22 Ultra", results["facet_counts"][0]["counts"][2]["value"]); +} + +TEST_F(CollectionOptimizedFacetingTest, ValueIndexStatsMinMax) { + Collection *coll1; + + std::vector fields = {field("title", field_types::STRING, false), + field("rating", field_types::FLOAT, true)}; + + std::vector sort_fields = {sort_by("rating", "DESC")}; + + coll1 = collectionManager.get_collection("coll1").get(); + if (coll1 == nullptr) { + coll1 = collectionManager.create_collection("coll1", 4, fields, "rating").get(); + } + + nlohmann::json doc; + doc["id"] = "0"; + doc["title"] = "The Shawshank Redemption"; + doc["rating"] = 9.3; + + coll1->add(doc.dump()); + + doc["id"] = "1"; + doc["title"] = "The Godfather"; + doc["rating"] = 9.2; + + coll1->add(doc.dump()); + + doc["id"] = "2"; + doc["title"] = "The Dark Knight"; + doc["rating"] = 9; + + coll1->add(doc.dump()); + + doc["id"] = "3"; + doc["title"] = "Pulp Fiction"; + doc["rating"] = 8.9; + + coll1->add(doc.dump()); + + doc["id"] = "4"; + doc["title"] = "Fight Club"; + doc["rating"] = 8.8; + + coll1->add(doc.dump()); + + std::vector facets = {"rating"}; + + //limit max facets to 2 + nlohmann::json results = coll1->search("*", {"title"}, "", facets, sort_fields, {0}, 10, 1, + token_ordering::FREQUENCY, {true}, 10, spp::sparse_hash_set(), + spp::sparse_hash_set(), 2,"", 30UL, 4UL, + "", 1UL, "", "", {}, 3UL, "", "", {}, + 4294967295UL, true, false, true, "", false, 6000000UL, 4UL, + 7UL, fallback, 4UL, {off}, 32767UL, 32767UL, 2UL, 2UL, false, + "", true, 0UL, max_score, 100UL, 0UL, 4294967295UL, VALUE).get(); + + ASSERT_EQ(1, results["facet_counts"].size()); + ASSERT_EQ(2, results["facet_counts"][0]["counts"].size()); + ASSERT_EQ("9.3", results["facet_counts"][0]["counts"][0]["value"]); + ASSERT_EQ("9.2", results["facet_counts"][0]["counts"][1]["value"]); + + //stats + ASSERT_EQ(5, results["facet_counts"][0]["stats"].size()); + ASSERT_FLOAT_EQ(9.25, results["facet_counts"][0]["stats"]["avg"].get()); + ASSERT_FLOAT_EQ(8.800000190734863, results["facet_counts"][0]["stats"]["min"].get()); + ASSERT_FLOAT_EQ(9.300000190734863, results["facet_counts"][0]["stats"]["max"].get()); + ASSERT_FLOAT_EQ(18.5, results["facet_counts"][0]["stats"]["sum"].get()); + ASSERT_FLOAT_EQ(2, results["facet_counts"][0]["stats"]["total_values"].get()); } \ No newline at end of file