deep nested parent return and enable on flag only

This commit is contained in:
krunal 2023-07-26 15:06:09 +05:30
parent 67fe400065
commit 0bc924daaa
3 changed files with 147 additions and 13 deletions

View File

@ -385,6 +385,8 @@ public:
bool facet_value_to_string(const facet &a_facet, const facet_count_t &facet_count, const nlohmann::json &document,
std::string &value) const;
std::string get_facet_parent(const std::string& facet_field_name, const nlohmann::json& document) const;
static void populate_result_kvs(Topster *topster, std::vector<std::vector<KV *>> &result_kvs,
const spp::sparse_hash_map<uint64_t, uint32_t>& groups_processed,
const std::vector<sort_by>& sort_by_fields);
@ -474,7 +476,8 @@ public:
facet_index_type_t facet_index_type = HASH,
const size_t remote_embedding_timeout_ms = 30000,
const size_t remote_embedding_num_try = 2,
const std::string& stopwords_set="") const;
const std::string& stopwords_set="",
bool facet_return_parent = false) const;
Option<bool> get_filter_ids(const std::string & filter_query, filter_result_t& filter_result) const;

View File

@ -1123,7 +1123,8 @@ Option<nlohmann::json> Collection::search(std::string raw_query,
facet_index_type_t facet_index_type,
const size_t remote_embedding_timeout_ms,
const size_t remote_embedding_num_try,
const std::string& stopwords_set) const {
const std::string& stopwords_set,
bool facet_return_parent) const {
std::shared_lock lock(mutex);
@ -2090,9 +2091,8 @@ Option<nlohmann::json> Collection::search(std::string raw_query,
}
}
if(the_field.nested) {
auto parent = the_field.name.substr(0, the_field.name.find("."));
value = document[parent].dump();
if(facet_return_parent && the_field.nested) {
value = get_facet_parent(the_field.name, document);
}
std::unordered_map<std::string, size_t> ftoken_pos;
@ -2806,6 +2806,36 @@ bool Collection::facet_value_to_string(const facet &a_facet, const facet_count_t
return true;
}
std::string Collection::get_facet_parent(const std::string& facet_field_name, const nlohmann::json& document) const {
std::vector<std::string> tokens;
StringUtils::split(facet_field_name, tokens, ".");
std::vector<nlohmann::json> level_docs;
auto doc = document[tokens[0]];
level_docs.push_back(doc);
for(auto i = 1; i < tokens.size()-1; ++i) { //just to ignore last token which uis our facet field
if(doc.contains(tokens[i])) {
doc = doc[tokens[i]];
level_docs.push_back(doc);
} else {
LOG(ERROR) << tokens[i] << " not found in document";
}
}
bool parent_found = false;
for(auto i = level_docs.size()-1; i >0; --i) {
if(level_docs[i].size() > 1) {
doc = level_docs[i];
parent_found = true;
break;
}
}
if(!parent_found) {
doc = level_docs[0]; //return the top most root
}
return doc.dump();
}
bool Collection::is_nested_array(const nlohmann::json& obj, std::vector<std::string> path_parts, size_t part_i) const {
auto child_it = obj.find(path_parts[part_i]);
if(child_it == obj.end()) {

View File

@ -1630,7 +1630,7 @@ TEST_F(CollectionFacetingTest, FloatFieldValueTruncation) {
ASSERT_EQ("300", results["facet_counts"][0]["counts"][0]["value"].get<std::string>());
}
TEST_F(CollectionFacetingTest, FacetingObjectTest) {
TEST_F(CollectionFacetingTest, FacetingReturnParent) {
nlohmann::json schema = R"({
"name": "coll1",
"enable_nested_fields": true,
@ -1669,18 +1669,119 @@ TEST_F(CollectionFacetingTest, FacetingObjectTest) {
add_op = coll1->add(doc2.dump(), CREATE);
ASSERT_TRUE(add_op.ok());
auto search_op = coll1->search("*", {},
"", {"value.color"}, {}, {2}, 10, 1,
FREQUENCY, {true}, 1, spp::sparse_hash_set<std::string>(),
spp::sparse_hash_set<std::string>(),
10, "value.color : blue");
auto search_op = coll1->search("*", {},"", {"value.color"},
{}, {2}, 10, 1,FREQUENCY, {true},
1, spp::sparse_hash_set<std::string>(),
spp::sparse_hash_set<std::string>(),10, "",
30, 4, "",
Index::TYPO_TOKENS_THRESHOLD, "", "",{},
3, "<mark>", "</mark>", {},
UINT32_MAX, true, false, true,
"", false, 6000*1000, 4, 7,
fallback, 4, {off}, INT16_MAX, INT16_MAX,
2, 2, false, "",
true, 0, max_score, 100,
0, 0, HASH, 30000,
2, "",true);
if(!search_op.ok()) {
LOG(ERROR) << search_op.error();
FAIL();
}
auto results = search_op.get();
LOG (INFO) << results.dump();
ASSERT_EQ(1, results["facet_counts"].size());
ASSERT_EQ(1, results["facet_counts"][0]["counts"].size());
ASSERT_EQ(2, results["facet_counts"][0]["counts"].size());
ASSERT_EQ("{\"b\":0,\"color\":\"red\",\"g\":0,\"r\":255}", results["facet_counts"][0]["counts"][0]["value"]);
ASSERT_EQ("{\"b\":255,\"color\":\"blue\",\"g\":0,\"r\":0}", results["facet_counts"][0]["counts"][1]["value"]);
//not passing facet_return_parent will only return facet value, not immediate parent
search_op = coll1->search("*", {},"", {"value.color"},
{}, {2}, 10, 1,FREQUENCY, {true},
1, spp::sparse_hash_set<std::string>(),
spp::sparse_hash_set<std::string>(),10, "",
30, 4, "",
Index::TYPO_TOKENS_THRESHOLD, "", "",{},
3, "<mark>", "</mark>", {},
UINT32_MAX, true, false, true,
"", false, 6000*1000, 4, 7,
fallback, 4, {off}, INT16_MAX, INT16_MAX,
2, 2, false, "",
true, 0, max_score, 100,
0, 0, HASH, 30000,
2, "",false);
if(!search_op.ok()) {
LOG(ERROR) << search_op.error();
FAIL();
}
results = search_op.get();
ASSERT_EQ(1, results["facet_counts"].size());
ASSERT_EQ(2, results["facet_counts"][0]["counts"].size());
ASSERT_EQ("red", results["facet_counts"][0]["counts"][0]["value"]);
ASSERT_EQ("blue", results["facet_counts"][0]["counts"][1]["value"]);
}
TEST_F(CollectionFacetingTest, FacetingReturnParentDeepNested) {
nlohmann::json schema = R"({
"name": "coll1",
"enable_nested_fields": true,
"fields": [
{"name": "product.specification.detail.width", "type": "int32", "optional": false, "facet": true }
]
})"_json;
auto op = collectionManager.create_collection(schema);
ASSERT_TRUE(op.ok());
Collection* coll1 = op.get();
nlohmann::json doc1 = R"({
"product" : {
"specification": {
"detail" : {
"width": 25
}
}
}
})"_json;
nlohmann::json doc2 = R"({
"product" : {
"specification": {
"detail" : {
"width": 30
}
}
}
})"_json;
auto add_op = coll1->add(doc1.dump(), CREATE);
ASSERT_TRUE(add_op.ok());
add_op = coll1->add(doc2.dump(), CREATE);
ASSERT_TRUE(add_op.ok());
auto search_op = coll1->search("*", {},"", {"product.specification.detail.width"},
{}, {2}, 10, 1,FREQUENCY, {true},
1, spp::sparse_hash_set<std::string>(),
spp::sparse_hash_set<std::string>(),10, "",
30, 4, "",
Index::TYPO_TOKENS_THRESHOLD, "", "",{},
3, "<mark>", "</mark>", {},
UINT32_MAX, true, false, true,
"", false, 6000*1000, 4, 7,
fallback, 4, {off}, INT16_MAX, INT16_MAX,
2, 2, false, "",
true, 0, max_score, 100,
0, 0, HASH, 30000,
2, "",true);
if(!search_op.ok()) {
LOG(ERROR) << search_op.error();
FAIL();
}
auto results = search_op.get();
ASSERT_EQ(1, results["facet_counts"].size());
ASSERT_EQ(2, results["facet_counts"][0]["counts"].size());
ASSERT_EQ("{\"specification\":{\"detail\":{\"width\":30}}}", results["facet_counts"][0]["counts"][0]["value"]);
ASSERT_EQ("{\"specification\":{\"detail\":{\"width\":25}}}", results["facet_counts"][0]["counts"][1]["value"]);
}