return parent with value based faceting

This commit is contained in:
krunal 2023-09-18 15:20:19 +05:30
parent 675a2fc402
commit 753584047b
5 changed files with 278 additions and 22 deletions

View File

@ -34,6 +34,11 @@ struct facet_value_id_t {
}
};
struct docid_count_t {
uint32_t doc_id;
uint32_t count;
};
class facet_index_t {
private:
struct facet_count_t {
@ -129,7 +134,7 @@ public:
size_t intersect(facet& a_facet,
bool has_facet_query, const std::vector<std::string>& fvalue_searched_tokens,
const uint32_t* result_ids, size_t result_id_len,
size_t max_facet_count, std::map<std::string, uint32_t>& found,
size_t max_facet_count, std::map<std::string, docid_count_t>& found,
bool is_wildcard_no_filter_query, const std::string& sort_order = "");
size_t get_facet_indexes(const std::string& field,

View File

@ -2336,28 +2336,28 @@ Option<nlohmann::json> Collection::search(std::string raw_query,
auto & facet_count = facet_counts[fi];
std::string value;
const std::string& seq_id_key = get_seq_id_key((uint32_t) facet_count.doc_id);
nlohmann::json document;
const Option<bool> & document_op = get_document_from_store(seq_id_key, document);
if(!document_op.ok()) {
LOG(ERROR) << "Facet fetch error. " << document_op.error();
continue;
}
if(a_facet.is_intersected) {
value = facet_count.fvalue;
//LOG(INFO) << "used intersection";
} else {
// fetch actual facet value from representative doc id
//LOG(INFO) << "used hashes";
const std::string& seq_id_key = get_seq_id_key((uint32_t) facet_count.doc_id);
nlohmann::json document;
const Option<bool> & document_op = get_document_from_store(seq_id_key, document);
if(!document_op.ok()) {
LOG(ERROR) << "Facet fetch error. " << document_op.error();
continue;
}
bool facet_found = facet_value_to_string(a_facet, facet_count, document, value);
if(!facet_found) {
continue;
}
}
if(the_field.nested && should_return_parent) {
value = get_facet_parent(the_field.name, document);
}
if(the_field.nested && should_return_parent) {
value = get_facet_parent(the_field.name, document);
}
std::unordered_map<std::string, size_t> ftoken_pos;

View File

@ -150,7 +150,7 @@ size_t facet_index_t::get_facet_count(const std::string& field_name) {
size_t facet_index_t::intersect(facet& a_facet,
bool has_facet_query, const std::vector<std::string>& fvalue_searched_tokens,
const uint32_t* result_ids, size_t result_ids_len,
size_t max_facet_count, std::map<std::string, uint32_t>& found,
size_t max_facet_count, std::map<std::string, docid_count_t>& found,
bool is_wildcard_no_filter_query, const std::string& sort_order) {
//LOG (INFO) << "intersecting field " << field;
@ -171,6 +171,7 @@ size_t facet_index_t::intersect(facet& a_facet,
auto intersect_fn = [&] (std::list<facet_count_t>::const_iterator facet_count_it) {
uint32_t count = 0;
uint32_t doc_id = 0;
if(has_facet_query) {
bool found_search_token = false;
auto facet_str = facet_count_it->facet_value;
@ -189,18 +190,20 @@ size_t facet_index_t::intersect(facet& a_facet,
}
}
auto ids = facet_index_map.at(facet_count_it->facet_value).seq_ids;
if (!ids) {
return;
}
if (is_wildcard_no_filter_query) {
count = facet_count_it->count;
} else {
auto ids = facet_index_map.at(facet_count_it->facet_value).seq_ids;
if (!ids) {
return;
}
count = ids_t::intersect_count(ids, result_ids, result_ids_len);
}
if (count) {
found[facet_count_it->facet_value] = count;
doc_id = ids_t::first_id(ids);
found[facet_count_it->facet_value] = {doc_id, count};
}
};

View File

@ -1298,7 +1298,7 @@ void Index::do_facets(std::vector<facet> & facets, facet_query_t & facet_query,
// LOG(INFO) << "Using intersection to find facets";
a_facet.is_intersected = true;
std::map<std::string, uint32_t> facet_results;
std::map<std::string, docid_count_t> facet_results;
std::string sort_order = a_facet.is_sort_by_alpha ? a_facet.sort_order : "";
facet_index_v4->intersect(a_facet, use_facet_query,
@ -1314,16 +1314,17 @@ void Index::do_facets(std::vector<facet> & facets, facet_query_t & facet_query,
if(a_facet.get_range(std::stoll(doc_val), range_pair)) {
const auto& range_id = range_pair.first;
facet_count_t& facet_count = a_facet.result_map[range_id];
facet_count.count = kv.second;
facet_count.count = kv.second.count;
}
} else {
facet_count_t& facet_count = a_facet.value_result_map[kv.first];
facet_count.count = kv.second;
facet_count.count = kv.second.count;
facet_count.doc_id = kv.second.doc_id;
}
if(should_compute_stats) {
//LOG(INFO) << "Computing facet stas for facet " << a_facet.field_name;
for(size_t i = 0; i < kv.second; ++i) {
for(size_t i = 0; i < kv.second.count; ++i) {
compute_facet_stats(a_facet, kv.first, facet_field.type);
}
}

View File

@ -1442,6 +1442,253 @@ TEST_F(CollectionOptimizedFacetingTest, StringLengthTest) {
ASSERT_TRUE(100 == longStr.size());
}
TEST_F(CollectionOptimizedFacetingTest, FacetingReturnParent) {
nlohmann::json schema = R"({
"name": "coll1",
"enable_nested_fields": true,
"fields": [
{"name": "value.color", "type": "string", "optional": false, "facet": true },
{"name": "value.r", "type": "int32", "optional": false, "facet": true },
{"name": "value.g", "type": "int32", "optional": false, "facet": true },
{"name": "value.b", "type": "int32", "optional": false, "facet": true }
]
})"_json;
auto op = collectionManager.create_collection(schema);
ASSERT_TRUE(op.ok());
Collection* coll1 = op.get();
nlohmann::json doc1 = R"({
"value": {
"color": "red",
"r": 255,
"g": 0,
"b": 0
}
})"_json;
nlohmann::json doc2 = R"({
"value": {
"color": "blue",
"r": 0,
"g": 0,
"b": 255
}
})"_json;
auto add_op = coll1->add(doc1.dump(), CREATE);
ASSERT_TRUE(add_op.ok());
add_op = coll1->add(doc2.dump(), CREATE);
ASSERT_TRUE(add_op.ok());
auto search_op = coll1->search("*", {},"", {"value.color"},
{}, {2}, 10, 1,FREQUENCY, {true},
1, spp::sparse_hash_set<std::string>(),
spp::sparse_hash_set<std::string>(),10, "",
30, 4, "",
Index::TYPO_TOKENS_THRESHOLD, "", "",{},
3, "<mark>", "</mark>", {},
UINT32_MAX, true, false, true,
"", false, 6000*1000, 4, 7,
fallback, 4, {off}, INT16_MAX, INT16_MAX,
2, 2, false, "",
true, 0, max_score, 100,
0, 0, VALUE, 30000,
2, "", {"value.color"});
if(!search_op.ok()) {
LOG(ERROR) << search_op.error();
FAIL();
}
auto results = search_op.get();
ASSERT_EQ(1, results["facet_counts"].size());
ASSERT_EQ(2, results["facet_counts"][0]["counts"].size());
ASSERT_EQ("{\"b\":0,\"color\":\"red\",\"g\":0,\"r\":255}", results["facet_counts"][0]["counts"][0]["value"]);
ASSERT_EQ("{\"b\":255,\"color\":\"blue\",\"g\":0,\"r\":0}", results["facet_counts"][0]["counts"][1]["value"]);
//not passing facet_fields in facet_return_parent list will only return facet value, not immediate parent for those field
search_op = coll1->search("*", {},"", {"value.color"},
{}, {2}, 10, 1,FREQUENCY, {true},
1, spp::sparse_hash_set<std::string>(),
spp::sparse_hash_set<std::string>(),10, "",
30, 4, "",
Index::TYPO_TOKENS_THRESHOLD, "", "",{},
3, "<mark>", "</mark>", {},
UINT32_MAX, true, false, true,
"", false, 6000*1000, 4, 7,
fallback, 4, {off}, INT16_MAX, INT16_MAX,
2, 2, false, "",
true, 0, max_score, 100,
0, 0, VALUE, 30000,
2, "", {});
if(!search_op.ok()) {
LOG(ERROR) << search_op.error();
FAIL();
}
results = search_op.get();
ASSERT_EQ(1, results["facet_counts"].size());
ASSERT_EQ(2, results["facet_counts"][0]["counts"].size());
ASSERT_EQ("red", results["facet_counts"][0]["counts"][0]["value"]);
ASSERT_EQ("blue", results["facet_counts"][0]["counts"][1]["value"]);
search_op = coll1->search("*", {},"", {"value.color", "value.r"},
{}, {2}, 10, 1,FREQUENCY, {true},
1, spp::sparse_hash_set<std::string>(),
spp::sparse_hash_set<std::string>(),10, "",
30, 4, "",
Index::TYPO_TOKENS_THRESHOLD, "", "",{},
3, "<mark>", "</mark>", {},
UINT32_MAX, true, false, true,
"", false, 6000*1000, 4, 7,
fallback, 4, {off}, INT16_MAX, INT16_MAX,
2, 2, false, "",
true, 0, max_score, 100,
0, 0, VALUE, 30000,
2, "", {"value.r"});
if(!search_op.ok()) {
LOG(ERROR) << search_op.error();
FAIL();
}
results = search_op.get();
ASSERT_EQ(2, results["facet_counts"].size());
ASSERT_EQ(2, results["facet_counts"][0]["counts"].size());
ASSERT_EQ("red", results["facet_counts"][0]["counts"][0]["value"]);
ASSERT_EQ("blue", results["facet_counts"][0]["counts"][1]["value"]);
ASSERT_EQ(2, results["facet_counts"][1]["counts"].size());
ASSERT_EQ("{\"b\":0,\"color\":\"red\",\"g\":0,\"r\":255}", results["facet_counts"][1]["counts"][0]["value"]);
ASSERT_EQ("{\"b\":255,\"color\":\"blue\",\"g\":0,\"r\":0}", results["facet_counts"][1]["counts"][1]["value"]);
}
TEST_F(CollectionOptimizedFacetingTest, FacetingReturnParentDeepNested) {
nlohmann::json schema = R"({
"name": "coll1",
"enable_nested_fields": true,
"fields": [
{"name": "product.specification.detail.width", "type": "int32", "optional": false, "facet": true }
]
})"_json;
auto op = collectionManager.create_collection(schema);
ASSERT_TRUE(op.ok());
Collection* coll1 = op.get();
nlohmann::json doc1 = R"({
"product" : {
"specification": {
"detail" : {
"width": 25
}
}
}
})"_json;
nlohmann::json doc2 = R"({
"product" : {
"specification": {
"detail" : {
"width": 30
}
}
}
})"_json;
auto add_op = coll1->add(doc1.dump(), CREATE);
ASSERT_TRUE(add_op.ok());
add_op = coll1->add(doc2.dump(), CREATE);
ASSERT_TRUE(add_op.ok());
auto search_op = coll1->search("*", {},"", {"product.specification.detail.width"},
{}, {2}, 10, 1,FREQUENCY, {true},
1, spp::sparse_hash_set<std::string>(),
spp::sparse_hash_set<std::string>(),10, "",
30, 4, "",
Index::TYPO_TOKENS_THRESHOLD, "", "",{},
3, "<mark>", "</mark>", {},
UINT32_MAX, true, false, true,
"", false, 6000*1000, 4, 7,
fallback, 4, {off}, INT16_MAX, INT16_MAX,
2, 2, false, "",
true, 0, max_score, 100,
0, 0, VALUE, 30000,
2, "", {"product.specification.detail.width"});
if(!search_op.ok()) {
LOG(ERROR) << search_op.error();
FAIL();
}
auto results = search_op.get();
ASSERT_EQ(1, results["facet_counts"].size());
ASSERT_EQ(2, results["facet_counts"][0]["counts"].size());
ASSERT_EQ("{\"specification\":{\"detail\":{\"width\":30}}}", results["facet_counts"][0]["counts"][0]["value"]);
ASSERT_EQ("{\"specification\":{\"detail\":{\"width\":25}}}", results["facet_counts"][0]["counts"][1]["value"]);
}
TEST_F(CollectionOptimizedFacetingTest, FacetingReturnParentObject) {
nlohmann::json schema = R"({
"name": "coll1",
"enable_nested_fields": true,
"fields": [
{"name": "value", "type": "object", "optional": false, "facet": true }
]
})"_json;
auto op = collectionManager.create_collection(schema);
ASSERT_TRUE(op.ok());
Collection* coll1 = op.get();
nlohmann::json doc1 = R"({
"value": {
"color": "red",
"r": 255,
"g": 0,
"b": 0
}
})"_json;
nlohmann::json doc2 = R"({
"value": {
"color": "blue",
"r": 0,
"g": 0,
"b": 255
}
})"_json;
auto add_op = coll1->add(doc1.dump(), CREATE);
ASSERT_TRUE(add_op.ok());
add_op = coll1->add(doc2.dump(), CREATE);
ASSERT_TRUE(add_op.ok());
auto search_op = coll1->search("*", {},"", {"value.color"},
{}, {2}, 10, 1,FREQUENCY, {true},
1, spp::sparse_hash_set<std::string>(),
spp::sparse_hash_set<std::string>(),10, "",
30, 4, "",
Index::TYPO_TOKENS_THRESHOLD, "", "",{},
3, "<mark>", "</mark>", {},
UINT32_MAX, true, false, true,
"", false, 6000*1000, 4, 7,
fallback, 4, {off}, INT16_MAX, INT16_MAX,
2, 2, false, "",
true, 0, max_score, 100,
0, 0, VALUE, 30000,
2, "", {"value.color"});
if(!search_op.ok()) {
LOG(ERROR) << search_op.error();
FAIL();
}
auto results = search_op.get();
ASSERT_EQ(1, results["facet_counts"].size());
ASSERT_EQ(2, results["facet_counts"][0]["counts"].size());
ASSERT_EQ("{\"b\":0,\"color\":\"red\",\"g\":0,\"r\":255}", results["facet_counts"][0]["counts"][0]["value"]);
ASSERT_EQ("{\"b\":255,\"color\":\"blue\",\"g\":0,\"r\":0}", results["facet_counts"][0]["counts"][1]["value"]);
}
TEST_F(CollectionOptimizedFacetingTest, FacetSortByAlpha) {
nlohmann::json schema = R"({
"name": "coll1",