adding stats support for int64 facets

This commit is contained in:
krunal 2023-09-28 15:50:40 +05:30
parent 2b62657b3c
commit f4fca28622
4 changed files with 36 additions and 9 deletions

View File

@ -109,7 +109,7 @@ private:
// field -> facet_index
std::unordered_map<std::string, facet_doc_ids_list_t> facet_field_map;
std::unordered_map<uint32_t, int64_t> index_to_int64_map;
// auto incrementing ID that is assigned to each unique facet value string
uint32_t next_facet_id = 0;
@ -121,7 +121,8 @@ public:
void insert(const std::string& field_name, std::unordered_map<facet_value_id_t,
std::vector<uint32_t>, facet_value_id_t::Hash>& fvalue_to_seq_ids,
std::unordered_map<uint32_t, std::vector<facet_value_id_t>>& seq_id_to_fvalues);
std::unordered_map<uint32_t, std::vector<facet_value_id_t>>& seq_id_to_fvalues,
bool is_string_field = false);
void erase(const std::string& field_name);
@ -150,4 +151,7 @@ public:
bool has_value_index(const std::string& field_name);
posting_list_t* get_facet_hash_index(const std::string& field_name);
//get int64 val from index for stats
int64_t get_facet_val(uint32_t index);
};

View File

@ -13,7 +13,8 @@ void facet_index_t::initialize(const std::string& field) {
void facet_index_t::insert(const std::string& field_name,std::unordered_map<facet_value_id_t,
std::vector<uint32_t>, facet_value_id_t::Hash>& fvalue_to_seq_ids,
std::unordered_map<uint32_t, std::vector<facet_value_id_t>>& seq_id_to_fvalues) {
std::unordered_map<uint32_t, std::vector<facet_value_id_t>>& seq_id_to_fvalues,
bool is_string_field) {
const auto facet_field_map_it = facet_field_map.find(field_name);
if(facet_field_map_it == facet_field_map.end()) {
@ -36,6 +37,11 @@ void facet_index_t::insert(const std::string& field_name,std::unordered_map<face
if(fvalue.facet_id == UINT32_MAX) {
// float, int32 & bool will provide facet_id as their own numerical values
facet_id = (fvalue_index_it == fvalue_index.end()) ? ++next_facet_id : fvalue_index_it->second.facet_id;
if(!is_string_field) {
int64_t val = std::stoll(fvalue.facet_value);
index_to_int64_map[facet_id] = val;
}
}
real_facet_ids.push_back(facet_id);
@ -333,3 +339,13 @@ posting_list_t* facet_index_t::get_facet_hash_index(const std::string &field_nam
}
return nullptr;
}
int64_t facet_index_t::get_facet_val(uint32_t index) {
auto it = index_to_int64_map.find(index);
if(it != index_to_int64_map.end()) {
return it->second;
}
return INT64_MAX;
}

View File

@ -781,7 +781,7 @@ void Index::index_field_in_memory(const field& afield, std::vector<index_record>
}
}
facet_index_v4->insert(afield.name, fvalue_to_seq_ids, seq_id_to_fvalues);
facet_index_v4->insert(afield.name, fvalue_to_seq_ids, seq_id_to_fvalues, afield.is_string());
auto tree_it = search_index.find(afield.faceted_name());
if(tree_it == search_index.end()) {
@ -1384,7 +1384,12 @@ void Index::do_facets(std::vector<facet> & facets, facet_query_t & facet_query,
}
if(should_compute_stats) {
compute_facet_stats(a_facet, fhash, facet_field.type);
int64_t val = fhash;
if(facet_field.is_int64()) {
val = facet_index_v4->get_facet_val(fhash);
}
compute_facet_stats(a_facet, val, facet_field.type);
}
if(a_facet.is_range_query) {
@ -4860,9 +4865,7 @@ void Index::compute_facet_infos(const std::vector<facet>& facets, facet_query_t&
facet_infos[findex].should_compute_stats = (facet_field.type != field_types::STRING &&
facet_field.type != field_types::BOOL &&
facet_field.type != field_types::STRING_ARRAY &&
facet_field.type != field_types::BOOL_ARRAY &&
facet_field.type != field_types::INT64 &&
facet_field.type != field_types::INT64_ARRAY);
facet_field.type != field_types::BOOL_ARRAY);
size_t num_facet_values = facet_index_v4->get_facet_count(facet_field.name);
facet_infos[findex].use_value_index = (group_limit == 0) && (a_facet.sort_field.empty()) &&

View File

@ -288,7 +288,11 @@ TEST_F(CollectionFacetingTest, FacetCounts) {
ASSERT_STREQ("1421890022", results["facet_counts"][0]["counts"][0]["value"].get<std::string>().c_str());
ASSERT_STREQ("<mark>142189002</mark>2", results["facet_counts"][0]["counts"][0]["highlighted"].get<std::string>().c_str());
ASSERT_EQ(1, results["facet_counts"][0]["stats"].size());
ASSERT_EQ(5, results["facet_counts"][0]["stats"].size());
ASSERT_FLOAT_EQ(1106321222, results["facet_counts"][0]["stats"]["avg"].get<double>());
ASSERT_FLOAT_EQ(348974822, results["facet_counts"][0]["stats"]["min"].get<double>());
ASSERT_FLOAT_EQ(1453426022, results["facet_counts"][0]["stats"]["max"].get<double>());
ASSERT_FLOAT_EQ(13275854664, results["facet_counts"][0]["stats"]["sum"].get<double>());
ASSERT_FLOAT_EQ(1, results["facet_counts"][0]["stats"]["total_values"].get<size_t>());
// facet query that does not match any indexed value