Merge pull request #1293 from krunal1313/value_index_stats

min/max of all matched facets
This commit is contained in:
Kishore Nallan 2023-10-10 16:48:44 +05:30 committed by GitHub
commit 9553ee35b4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 102 additions and 2 deletions

View File

@ -56,4 +56,5 @@ public:
size_t& result_ids_len,
uint32_t*& result_ids) const;
std::pair<int64_t, int64_t> get_min_max(const uint32_t* result_ids, size_t result_ids_len);
};

View File

@ -1354,6 +1354,16 @@ void Index::do_facets(std::vector<facet> & facets, facet_query_t & facet_query,
}
}
}
if(should_compute_stats) {
auto numerical_index_it = numerical_index.find(a_facet.field_name);
if(numerical_index_it != numerical_index.end()) {
auto min_max_pair = numerical_index_it->second->get_min_max(result_ids,
results_size);
a_facet.stats.fvmin = int64_t_to_float(min_max_pair.first);
a_facet.stats.fvmax = int64_t_to_float(min_max_pair.second);
}
}
} else {
//LOG(INFO) << "Using hashing to find facets";
bool facet_hash_index_exists = facet_index_v4->has_hash_index(facet_field.name);

View File

@ -341,6 +341,26 @@ void num_tree_t::seq_ids_outside_top_k(size_t k, std::vector<uint32_t> &seq_ids)
}
}
std::pair<int64_t, int64_t> num_tree_t::get_min_max(const uint32_t* result_ids, size_t result_ids_len) {
int64_t min, max;
//first traverse from top to find min
for(auto int64map_it = int64map.begin(); int64map_it != int64map.end(); ++int64map_it) {
if(ids_t::intersect_count(int64map_it->second, result_ids, result_ids_len)) {
min = int64map_it->first;
break;
}
}
//traverse from end to find max
for(auto int64map_it = int64map.rbegin(); int64map_it != int64map.rend(); ++int64map_it) {
if(ids_t::intersect_count(int64map_it->second, result_ids, result_ids_len)) {
max = int64map_it->first;
break;
}
}
return std::make_pair(min, max);
}
size_t num_tree_t::size() {
return int64map.size();

View File

@ -315,8 +315,8 @@ TEST_F(CollectionOptimizedFacetingTest, FacetCounts) {
ASSERT_EQ(5, results["facet_counts"][0]["stats"].size());
ASSERT_FLOAT_EQ(7.812, results["facet_counts"][0]["stats"]["avg"].get<double>());
ASSERT_FLOAT_EQ(7.812, results["facet_counts"][0]["stats"]["min"].get<double>());
ASSERT_FLOAT_EQ(7.812, results["facet_counts"][0]["stats"]["max"].get<double>());
ASSERT_FLOAT_EQ(0, results["facet_counts"][0]["stats"]["min"].get<double>());
ASSERT_FLOAT_EQ(9.9989996, results["facet_counts"][0]["stats"]["max"].get<double>());
ASSERT_FLOAT_EQ(7.812, results["facet_counts"][0]["stats"]["sum"].get<double>());
ASSERT_FLOAT_EQ(1, results["facet_counts"][0]["stats"]["total_values"].get<size_t>());
@ -2125,4 +2125,73 @@ TEST_F(CollectionOptimizedFacetingTest, FacetSortValidation) {
ASSERT_EQ("Fusion Plus", results["facet_counts"][0]["counts"][0]["value"]);
ASSERT_EQ("Oneplus 11R", results["facet_counts"][0]["counts"][1]["value"]);
ASSERT_EQ("S22 Ultra", results["facet_counts"][0]["counts"][2]["value"]);
}
TEST_F(CollectionOptimizedFacetingTest, ValueIndexStatsMinMax) {
Collection *coll1;
std::vector<field> fields = {field("title", field_types::STRING, false),
field("rating", field_types::FLOAT, true)};
std::vector<sort_by> sort_fields = {sort_by("rating", "DESC")};
coll1 = collectionManager.get_collection("coll1").get();
if (coll1 == nullptr) {
coll1 = collectionManager.create_collection("coll1", 4, fields, "rating").get();
}
nlohmann::json doc;
doc["id"] = "0";
doc["title"] = "The Shawshank Redemption";
doc["rating"] = 9.3;
coll1->add(doc.dump());
doc["id"] = "1";
doc["title"] = "The Godfather";
doc["rating"] = 9.2;
coll1->add(doc.dump());
doc["id"] = "2";
doc["title"] = "The Dark Knight";
doc["rating"] = 9;
coll1->add(doc.dump());
doc["id"] = "3";
doc["title"] = "Pulp Fiction";
doc["rating"] = 8.9;
coll1->add(doc.dump());
doc["id"] = "4";
doc["title"] = "Fight Club";
doc["rating"] = 8.8;
coll1->add(doc.dump());
std::vector<std::string> facets = {"rating"};
//limit max facets to 2
nlohmann::json results = coll1->search("*", {"title"}, "", facets, sort_fields, {0}, 10, 1,
token_ordering::FREQUENCY, {true}, 10, spp::sparse_hash_set<std::string>(),
spp::sparse_hash_set<std::string>(), 2,"", 30UL, 4UL,
"", 1UL, "", "", {}, 3UL, "<mark>", "</mark>", {},
4294967295UL, true, false, true, "", false, 6000000UL, 4UL,
7UL, fallback, 4UL, {off}, 32767UL, 32767UL, 2UL, 2UL, false,
"", true, 0UL, max_score, 100UL, 0UL, 4294967295UL, VALUE).get();
ASSERT_EQ(1, results["facet_counts"].size());
ASSERT_EQ(2, results["facet_counts"][0]["counts"].size());
ASSERT_EQ("9.3", results["facet_counts"][0]["counts"][0]["value"]);
ASSERT_EQ("9.2", results["facet_counts"][0]["counts"][1]["value"]);
//stats
ASSERT_EQ(5, results["facet_counts"][0]["stats"].size());
ASSERT_FLOAT_EQ(9.25, results["facet_counts"][0]["stats"]["avg"].get<double>());
ASSERT_FLOAT_EQ(8.800000190734863, results["facet_counts"][0]["stats"]["min"].get<double>());
ASSERT_FLOAT_EQ(9.300000190734863, results["facet_counts"][0]["stats"]["max"].get<double>());
ASSERT_FLOAT_EQ(18.5, results["facet_counts"][0]["stats"]["sum"].get<double>());
ASSERT_FLOAT_EQ(2, results["facet_counts"][0]["stats"]["total_values"].get<size_t>());
}