diff --git a/src/collection.cpp b/src/collection.cpp index 2f9fac7b..92843150 100644 --- a/src/collection.cpp +++ b/src/collection.cpp @@ -1958,10 +1958,12 @@ Option Collection::search(std::string raw_query, auto the_field = search_schema.at(a_facet.field_name); // keep only top K facets auto max_facets = std::min(max_facet_values, facet_hash_counts.size()); - - std::nth_element(facet_hash_counts.begin(), facet_hash_counts.begin() + max_facets, - facet_hash_counts.end(), Collection::facet_count_compare); + auto nthElement = max_facets == facet_hash_counts.size() ? max_facets - 1 : max_facets; + + std::nth_element(facet_hash_counts.begin(), facet_hash_counts.begin() + nthElement, + facet_hash_counts.end(), Collection::facet_count_compare); + for(size_t fi = 0; fi < max_facets; fi++) { // remap facet value hash with actual string auto & kv = facet_hash_counts[fi]; @@ -2672,13 +2674,9 @@ bool Collection::facet_value_to_string(const facet &a_facet, const facet_count_t } else if(search_schema.at(a_facet.field_name).type == field_types::FLOAT) { float raw_val = document[a_facet.field_name].get(); value = StringUtils::float_to_str(raw_val); - if(value != "0") { - value.erase ( value.find_last_not_of('0') + 1, std::string::npos ); // remove trailing zeros - } } else if(search_schema.at(a_facet.field_name).type == field_types::FLOAT_ARRAY) { float raw_val = document[a_facet.field_name][facet_count.array_pos].get(); value = StringUtils::float_to_str(raw_val); - value.erase ( value.find_last_not_of('0') + 1, std::string::npos ); // remove trailing zeros } else if(search_schema.at(a_facet.field_name).type == field_types::BOOL) { value = std::to_string(document[a_facet.field_name].get()); value = (value == "1") ? "true" : "false"; diff --git a/src/index.cpp b/src/index.cpp index 9eccc0f7..39a278ae 100644 --- a/src/index.cpp +++ b/src/index.cpp @@ -660,9 +660,8 @@ void Index::index_field_in_memory(const field& afield, std::vector for(size_t i = 0; i < document[afield.name].size(); ++i) { if(afield.type == field_types::INT32_ARRAY) { int32_t raw_val = document[afield.name][i].get(); - value = std::to_string(raw_val); - auto index = facet_index_v4->insert(afield.name, value, seq_id); - fhashvalues.hashes.emplace_back(index); + uint32_t hash = reinterpret_cast(raw_val); + fhashvalues.hashes.emplace_back(hash); } else if(afield.type == field_types::INT64_ARRAY) { int64_t raw_val = document[afield.name][i].get(); value = std::to_string(raw_val); @@ -674,13 +673,12 @@ void Index::index_field_in_memory(const field& afield, std::vector fhashvalues.hashes.emplace_back(index); } else if(afield.type == field_types::FLOAT_ARRAY) { float raw_val = document[afield.name][i].get(); - value = StringUtils::float_to_str(raw_val); - auto index = facet_index_v4->insert(afield.name, value, seq_id); - fhashvalues.hashes.emplace_back(index); + uint32_t hash = reinterpret_cast(raw_val); + fhashvalues.hashes.emplace_back(hash); } else if(afield.type == field_types::BOOL_ARRAY) { - value = std::to_string(document[afield.name][i].get()); - auto index = facet_index_v4->insert(afield.name, value, seq_id); - fhashvalues.hashes.emplace_back(index); + bool raw_val = document[afield.name][i].get(); + uint32_t hash = reinterpret_cast(raw_val); + fhashvalues.hashes.emplace_back(hash); } } fhashvalues.length = fhashvalues.hashes.size(); @@ -696,8 +694,7 @@ void Index::index_field_in_memory(const field& afield, std::vector if(afield.type == field_types::INT32) { int32_t raw_val = document[afield.name].get(); - value = std::to_string(raw_val); - fhash = facet_index_v4->insert(afield.name, value, seq_id); + fhash = reinterpret_cast(raw_val); } else if(afield.type == field_types::INT64) { int64_t raw_val = document[afield.name].get(); @@ -710,12 +707,11 @@ void Index::index_field_in_memory(const field& afield, std::vector } else if(afield.type == field_types::FLOAT) { float raw_val = document[afield.name].get(); - value = StringUtils::float_to_str(raw_val); - fhash = facet_index_v4->insert(afield.name, value, seq_id); + fhash = reinterpret_cast(raw_val); } else if(afield.type == field_types::BOOL) { - value = std::to_string(document[afield.name].get()); - fhash = facet_index_v4->insert(afield.name, value, seq_id); + bool raw_val = document[afield.name].get(); + fhash = reinterpret_cast(raw_val); } auto& facet_dim_index = single_val_facet_index_v3[afield.name][seq_id % ARRAY_FACET_DIM]; @@ -1139,7 +1135,7 @@ void Index::compute_facet_stats(facet &a_facet, const std::string& raw_value, co } } -void Index::compute_facet_stats(facet &a_facet, const int64_t raw_value, const std::string & field_type) { +void Index::compute_facet_stats(facet &a_facet, int64_t raw_value, const std::string & field_type) { if(field_type == field_types::INT32 || field_type == field_types::INT32_ARRAY) { int32_t val = raw_value; if (val < a_facet.stats.fvmin) { @@ -1161,7 +1157,7 @@ void Index::compute_facet_stats(facet &a_facet, const int64_t raw_value, const s a_facet.stats.fvsum += val; a_facet.stats.fvcount++; } else if(field_type == field_types::FLOAT || field_type == field_types::FLOAT_ARRAY) { - float val = int64_t_to_float(raw_value); + float val = reinterpret_cast(raw_value); if(val < a_facet.stats.fvmin) { a_facet.stats.fvmin = val; } @@ -1298,7 +1294,6 @@ void Index::do_facets(std::vector & facets, facet_query_t & facet_query, continue; } - int64_t doc_val = INT64_MAX; auto sort_index_it = sort_index.find(a_facet.field_name); for(size_t i = 0; i < results_size; i++) { @@ -1340,17 +1335,11 @@ void Index::do_facets(std::vector & facets, facet_query_t & facet_query, fhash = facet_map_it->second.hashes[j]; } if(should_compute_stats) { - doc_val = get_doc_val_from_sort_index(sort_index_it, doc_seq_id); - - if(doc_val != INT64_MAX) { - compute_facet_stats(a_facet, doc_val, facet_field.type); - } + compute_facet_stats(a_facet, fhash, facet_field.type); } if(a_facet.is_range_query) { - if(doc_val == INT64_MAX) { - doc_val = get_doc_val_from_sort_index(sort_index_it, doc_seq_id); - } + int64_t doc_val = get_doc_val_from_sort_index(sort_index_it, doc_seq_id); std::pair range_pair {}; if(a_facet.get_range(std::to_string(doc_val), range_pair)) { @@ -4903,7 +4892,9 @@ void Index::compute_facet_infos(const std::vector& facets, facet_query_t& facet_infos[findex].should_compute_stats = (facet_field.type != field_types::STRING && facet_field.type != field_types::BOOL && facet_field.type != field_types::STRING_ARRAY && - facet_field.type != field_types::BOOL_ARRAY); + facet_field.type != field_types::BOOL_ARRAY && + facet_field.type != field_types::INT64 && + facet_field.type != field_types::INT64_ARRAY); if(a_facet.field_name == facet_query.field_name && !facet_query.query.empty()) { facet_infos[findex].use_facet_query = true; diff --git a/test/collection_faceting_test.cpp b/test/collection_faceting_test.cpp index 2f107d6a..039a3be2 100644 --- a/test/collection_faceting_test.cpp +++ b/test/collection_faceting_test.cpp @@ -231,13 +231,12 @@ TEST_F(CollectionFacetingTest, FacetCounts) { ASSERT_STREQ("age", results["facet_counts"][0]["field_name"].get().c_str()); ASSERT_EQ(1, (int) results["facet_counts"][0]["counts"][0]["count"]); - ASSERT_STREQ("21", results["facet_counts"][0]["counts"][0]["value"].get().c_str()); - ASSERT_STREQ("21", results["facet_counts"][0]["counts"][0]["highlighted"].get().c_str()); + ASSERT_STREQ("24", results["facet_counts"][0]["counts"][0]["value"].get().c_str()); + ASSERT_STREQ("24", results["facet_counts"][0]["counts"][0]["highlighted"].get().c_str()); ASSERT_EQ(1, (int) results["facet_counts"][0]["counts"][1]["count"]); - ASSERT_STREQ("24", results["facet_counts"][0]["counts"][1]["value"].get().c_str()); - ASSERT_STREQ("24", results["facet_counts"][0]["counts"][1]["highlighted"].get().c_str()); - + ASSERT_STREQ("21", results["facet_counts"][0]["counts"][1]["value"].get().c_str()); + ASSERT_STREQ("21", results["facet_counts"][0]["counts"][1]["highlighted"].get().c_str()); // facet on a float field without query to check on stats results = coll_array_fields->search("*", query_fields, "", {"rating"}, sort_fields, {0}, 10, 1, FREQUENCY, @@ -289,11 +288,6 @@ TEST_F(CollectionFacetingTest, FacetCounts) { ASSERT_STREQ("1421890022", results["facet_counts"][0]["counts"][0]["value"].get().c_str()); ASSERT_STREQ("1421890022", results["facet_counts"][0]["counts"][0]["highlighted"].get().c_str()); - ASSERT_EQ(5, results["facet_counts"][0]["stats"].size()); - ASSERT_FLOAT_EQ(348974822.0, results["facet_counts"][0]["stats"]["min"].get()); - ASSERT_FLOAT_EQ(1453426022.0, results["facet_counts"][0]["stats"]["max"].get()); - ASSERT_FLOAT_EQ(13275854664.0, results["facet_counts"][0]["stats"]["sum"].get()); - ASSERT_FLOAT_EQ(1106321222.0, results["facet_counts"][0]["stats"]["avg"].get()); ASSERT_FLOAT_EQ(1, results["facet_counts"][0]["stats"]["total_values"].get()); // facet query that does not match any indexed value @@ -958,10 +952,11 @@ TEST_F(CollectionFacetingTest, FacetValuesShouldBeNormalized) { ASSERT_EQ(3, results["hits"].size()); ASSERT_EQ(1, results["facet_counts"].size()); - ASSERT_EQ(1, results["facet_counts"][0]["counts"].size()); + ASSERT_EQ(3, results["facet_counts"][0]["counts"].size()); - // any document is chosen as representative - ASSERT_EQ("bu-qu", results["facet_counts"][0]["counts"][0]["value"].get()); + ASSERT_EQ("Buqu", results["facet_counts"][0]["counts"][0]["value"].get()); + ASSERT_EQ("BUQU", results["facet_counts"][0]["counts"][1]["value"].get()); + ASSERT_EQ("bu-qu", results["facet_counts"][0]["counts"][2]["value"].get()); collectionManager.drop_collection("coll1"); } @@ -992,10 +987,11 @@ TEST_F(CollectionFacetingTest, FacetArrayValuesShouldBeNormalized) { ASSERT_EQ(1, results["hits"].size()); ASSERT_EQ(1, results["facet_counts"].size()); - ASSERT_EQ(1, results["facet_counts"][0]["counts"].size()); + ASSERT_EQ(3, results["facet_counts"][0]["counts"].size()); - // any document is chosen as representative - ASSERT_EQ("bu-qu", results["facet_counts"][0]["counts"][0]["value"].get()); + ASSERT_EQ("Buqu", results["facet_counts"][0]["counts"][0]["value"].get()); + ASSERT_EQ("BUQU", results["facet_counts"][0]["counts"][1]["value"].get()); + ASSERT_EQ("bu-qu", results["facet_counts"][0]["counts"][2]["value"].get()); collectionManager.drop_collection("coll1"); } diff --git a/test/collection_grouping_test.cpp b/test/collection_grouping_test.cpp index f88d9551..527e7b17 100644 --- a/test/collection_grouping_test.cpp +++ b/test/collection_grouping_test.cpp @@ -452,10 +452,10 @@ TEST_F(CollectionGroupingTest, GroupingWithArrayFieldAndOverride) { ASSERT_STREQ("brand", res["facet_counts"][0]["field_name"].get().c_str()); ASSERT_EQ(2, (int) res["facet_counts"][0]["counts"][0]["count"]); - ASSERT_STREQ("Xorp", res["facet_counts"][0]["counts"][0]["value"].get().c_str()); + ASSERT_STREQ("Beta", res["facet_counts"][0]["counts"][0]["value"].get().c_str()); ASSERT_EQ(2, (int) res["facet_counts"][0]["counts"][1]["count"]); - ASSERT_STREQ("Beta", res["facet_counts"][0]["counts"][1]["value"].get().c_str()); + ASSERT_STREQ("Xorp", res["facet_counts"][0]["counts"][1]["value"].get().c_str()); ASSERT_EQ(2, (int) res["facet_counts"][0]["counts"][2]["count"]); ASSERT_STREQ("Omega", res["facet_counts"][0]["counts"][2]["value"].get().c_str());