From a10cf167caa968faf19c050a320ce211ded25ffb Mon Sep 17 00:00:00 2001 From: Kishore Nallan Date: Wed, 21 Dec 2022 14:39:06 +0530 Subject: [PATCH] Fix edge case for facet counts with empty strings in array. --- src/collection.cpp | 2 +- src/index.cpp | 8 +++----- test/collection_faceting_test.cpp | 33 +++++++++++++++++++++++++++++++ 3 files changed, 37 insertions(+), 6 deletions(-) diff --git a/src/collection.cpp b/src/collection.cpp index d9a35b81..221a8017 100644 --- a/src/collection.cpp +++ b/src/collection.cpp @@ -1658,7 +1658,7 @@ Option Collection::search(const std::string & raw_query, facet_result["field_name"] = a_facet.field_name; facet_result["counts"] = nlohmann::json::array(); - std::vector> facet_hash_counts; + std::vector> facet_hash_counts; for (const auto & kv : a_facet.result_map) { facet_hash_counts.emplace_back(kv); } diff --git a/src/index.cpp b/src/index.cpp index 99ae695d..52d56f94 100644 --- a/src/index.cpp +++ b/src/index.cpp @@ -1170,16 +1170,14 @@ void Index::tokenize_string_array_with_facets(const std::vector& st } } - //LOG(INFO) << "Str: " << str << ", last_token: " << last_token; + if(is_facet) { + facet_hashes.push_back(facet_hash); + } if(token_set.empty()) { continue; } - if(is_facet) { - facet_hashes.push_back(facet_hash); - } - for(auto& the_token: token_set) { // repeat last element to indicate end of offsets for this array index token_to_offsets[the_token].push_back(token_to_offsets[the_token].back()); diff --git a/test/collection_faceting_test.cpp b/test/collection_faceting_test.cpp index d5b07c1c..497eb588 100644 --- a/test/collection_faceting_test.cpp +++ b/test/collection_faceting_test.cpp @@ -979,3 +979,36 @@ TEST_F(CollectionFacetingTest, FacetByNestedIntField) { ASSERT_EQ(2, results["facet_counts"][0]["counts"][0]["count"].get()); ASSERT_EQ("2000", results["facet_counts"][0]["counts"][0]["value"].get()); } + +TEST_F(CollectionFacetingTest, FacetOnArrayFieldWithSpecialChars) { + std::vector fields = { + field("tags", field_types::STRING_ARRAY, true), + field("points", field_types::INT32, true), + }; + + Collection* coll1 = collectionManager.create_collection("coll1", 1, fields).get(); + + nlohmann::json doc; + doc["tags"] = {"gamma"}; + doc["points"] = 10; + ASSERT_TRUE(coll1->add(doc.dump()).ok()); + + doc["tags"] = {"alpha", "| . |", "beta", "gamma"}; + doc["points"] = 10; + ASSERT_TRUE(coll1->add(doc.dump()).ok()); + + auto results = coll1->search("*", {}, + "", {"tags"}, {}, {2}, 10, 1, FREQUENCY, {true}, 1).get(); + + ASSERT_EQ(1, results["facet_counts"].size()); + ASSERT_EQ(4, results["facet_counts"][0]["counts"].size()); + + for(size_t i = 0; i < results["facet_counts"][0]["counts"].size(); i++) { + auto fvalue = results["facet_counts"][0]["counts"][i]["value"].get(); + if(fvalue == "gamma") { + ASSERT_EQ(2, results["facet_counts"][0]["counts"][i]["count"].get()); + } else { + ASSERT_EQ(1, results["facet_counts"][0]["counts"][i]["count"].get()); + } + } +}