mirror of
https://github.com/typesense/typesense.git
synced 2025-05-21 22:33:27 +08:00
return parent with value based faceting
This commit is contained in:
parent
675a2fc402
commit
753584047b
@ -34,6 +34,11 @@ struct facet_value_id_t {
|
||||
}
|
||||
};
|
||||
|
||||
struct docid_count_t {
|
||||
uint32_t doc_id;
|
||||
uint32_t count;
|
||||
};
|
||||
|
||||
class facet_index_t {
|
||||
private:
|
||||
struct facet_count_t {
|
||||
@ -129,7 +134,7 @@ public:
|
||||
size_t intersect(facet& a_facet,
|
||||
bool has_facet_query, const std::vector<std::string>& fvalue_searched_tokens,
|
||||
const uint32_t* result_ids, size_t result_id_len,
|
||||
size_t max_facet_count, std::map<std::string, uint32_t>& found,
|
||||
size_t max_facet_count, std::map<std::string, docid_count_t>& found,
|
||||
bool is_wildcard_no_filter_query, const std::string& sort_order = "");
|
||||
|
||||
size_t get_facet_indexes(const std::string& field,
|
||||
|
@ -2336,28 +2336,28 @@ Option<nlohmann::json> Collection::search(std::string raw_query,
|
||||
auto & facet_count = facet_counts[fi];
|
||||
std::string value;
|
||||
|
||||
const std::string& seq_id_key = get_seq_id_key((uint32_t) facet_count.doc_id);
|
||||
nlohmann::json document;
|
||||
const Option<bool> & document_op = get_document_from_store(seq_id_key, document);
|
||||
if(!document_op.ok()) {
|
||||
LOG(ERROR) << "Facet fetch error. " << document_op.error();
|
||||
continue;
|
||||
}
|
||||
|
||||
if(a_facet.is_intersected) {
|
||||
value = facet_count.fvalue;
|
||||
//LOG(INFO) << "used intersection";
|
||||
} else {
|
||||
// fetch actual facet value from representative doc id
|
||||
//LOG(INFO) << "used hashes";
|
||||
const std::string& seq_id_key = get_seq_id_key((uint32_t) facet_count.doc_id);
|
||||
nlohmann::json document;
|
||||
const Option<bool> & document_op = get_document_from_store(seq_id_key, document);
|
||||
if(!document_op.ok()) {
|
||||
LOG(ERROR) << "Facet fetch error. " << document_op.error();
|
||||
continue;
|
||||
}
|
||||
|
||||
bool facet_found = facet_value_to_string(a_facet, facet_count, document, value);
|
||||
if(!facet_found) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if(the_field.nested && should_return_parent) {
|
||||
value = get_facet_parent(the_field.name, document);
|
||||
}
|
||||
if(the_field.nested && should_return_parent) {
|
||||
value = get_facet_parent(the_field.name, document);
|
||||
}
|
||||
|
||||
std::unordered_map<std::string, size_t> ftoken_pos;
|
||||
|
@ -150,7 +150,7 @@ size_t facet_index_t::get_facet_count(const std::string& field_name) {
|
||||
size_t facet_index_t::intersect(facet& a_facet,
|
||||
bool has_facet_query, const std::vector<std::string>& fvalue_searched_tokens,
|
||||
const uint32_t* result_ids, size_t result_ids_len,
|
||||
size_t max_facet_count, std::map<std::string, uint32_t>& found,
|
||||
size_t max_facet_count, std::map<std::string, docid_count_t>& found,
|
||||
bool is_wildcard_no_filter_query, const std::string& sort_order) {
|
||||
//LOG (INFO) << "intersecting field " << field;
|
||||
|
||||
@ -171,6 +171,7 @@ size_t facet_index_t::intersect(facet& a_facet,
|
||||
|
||||
auto intersect_fn = [&] (std::list<facet_count_t>::const_iterator facet_count_it) {
|
||||
uint32_t count = 0;
|
||||
uint32_t doc_id = 0;
|
||||
if(has_facet_query) {
|
||||
bool found_search_token = false;
|
||||
auto facet_str = facet_count_it->facet_value;
|
||||
@ -189,18 +190,20 @@ size_t facet_index_t::intersect(facet& a_facet,
|
||||
}
|
||||
}
|
||||
|
||||
auto ids = facet_index_map.at(facet_count_it->facet_value).seq_ids;
|
||||
if (!ids) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (is_wildcard_no_filter_query) {
|
||||
count = facet_count_it->count;
|
||||
} else {
|
||||
auto ids = facet_index_map.at(facet_count_it->facet_value).seq_ids;
|
||||
if (!ids) {
|
||||
return;
|
||||
}
|
||||
count = ids_t::intersect_count(ids, result_ids, result_ids_len);
|
||||
}
|
||||
|
||||
if (count) {
|
||||
found[facet_count_it->facet_value] = count;
|
||||
doc_id = ids_t::first_id(ids);
|
||||
found[facet_count_it->facet_value] = {doc_id, count};
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -1298,7 +1298,7 @@ void Index::do_facets(std::vector<facet> & facets, facet_query_t & facet_query,
|
||||
// LOG(INFO) << "Using intersection to find facets";
|
||||
a_facet.is_intersected = true;
|
||||
|
||||
std::map<std::string, uint32_t> facet_results;
|
||||
std::map<std::string, docid_count_t> facet_results;
|
||||
std::string sort_order = a_facet.is_sort_by_alpha ? a_facet.sort_order : "";
|
||||
|
||||
facet_index_v4->intersect(a_facet, use_facet_query,
|
||||
@ -1314,16 +1314,17 @@ void Index::do_facets(std::vector<facet> & facets, facet_query_t & facet_query,
|
||||
if(a_facet.get_range(std::stoll(doc_val), range_pair)) {
|
||||
const auto& range_id = range_pair.first;
|
||||
facet_count_t& facet_count = a_facet.result_map[range_id];
|
||||
facet_count.count = kv.second;
|
||||
facet_count.count = kv.second.count;
|
||||
}
|
||||
} else {
|
||||
facet_count_t& facet_count = a_facet.value_result_map[kv.first];
|
||||
facet_count.count = kv.second;
|
||||
facet_count.count = kv.second.count;
|
||||
facet_count.doc_id = kv.second.doc_id;
|
||||
}
|
||||
|
||||
if(should_compute_stats) {
|
||||
//LOG(INFO) << "Computing facet stas for facet " << a_facet.field_name;
|
||||
for(size_t i = 0; i < kv.second; ++i) {
|
||||
for(size_t i = 0; i < kv.second.count; ++i) {
|
||||
compute_facet_stats(a_facet, kv.first, facet_field.type);
|
||||
}
|
||||
}
|
||||
|
@ -1442,6 +1442,253 @@ TEST_F(CollectionOptimizedFacetingTest, StringLengthTest) {
|
||||
ASSERT_TRUE(100 == longStr.size());
|
||||
}
|
||||
|
||||
TEST_F(CollectionOptimizedFacetingTest, FacetingReturnParent) {
|
||||
nlohmann::json schema = R"({
|
||||
"name": "coll1",
|
||||
"enable_nested_fields": true,
|
||||
"fields": [
|
||||
{"name": "value.color", "type": "string", "optional": false, "facet": true },
|
||||
{"name": "value.r", "type": "int32", "optional": false, "facet": true },
|
||||
{"name": "value.g", "type": "int32", "optional": false, "facet": true },
|
||||
{"name": "value.b", "type": "int32", "optional": false, "facet": true }
|
||||
]
|
||||
})"_json;
|
||||
|
||||
auto op = collectionManager.create_collection(schema);
|
||||
ASSERT_TRUE(op.ok());
|
||||
Collection* coll1 = op.get();
|
||||
|
||||
nlohmann::json doc1 = R"({
|
||||
"value": {
|
||||
"color": "red",
|
||||
"r": 255,
|
||||
"g": 0,
|
||||
"b": 0
|
||||
}
|
||||
})"_json;
|
||||
|
||||
nlohmann::json doc2 = R"({
|
||||
"value": {
|
||||
"color": "blue",
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 255
|
||||
}
|
||||
})"_json;
|
||||
|
||||
auto add_op = coll1->add(doc1.dump(), CREATE);
|
||||
ASSERT_TRUE(add_op.ok());
|
||||
add_op = coll1->add(doc2.dump(), CREATE);
|
||||
ASSERT_TRUE(add_op.ok());
|
||||
|
||||
auto search_op = coll1->search("*", {},"", {"value.color"},
|
||||
{}, {2}, 10, 1,FREQUENCY, {true},
|
||||
1, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(),10, "",
|
||||
30, 4, "",
|
||||
Index::TYPO_TOKENS_THRESHOLD, "", "",{},
|
||||
3, "<mark>", "</mark>", {},
|
||||
UINT32_MAX, true, false, true,
|
||||
"", false, 6000*1000, 4, 7,
|
||||
fallback, 4, {off}, INT16_MAX, INT16_MAX,
|
||||
2, 2, false, "",
|
||||
true, 0, max_score, 100,
|
||||
0, 0, VALUE, 30000,
|
||||
2, "", {"value.color"});
|
||||
|
||||
if(!search_op.ok()) {
|
||||
LOG(ERROR) << search_op.error();
|
||||
FAIL();
|
||||
}
|
||||
auto results = search_op.get();
|
||||
ASSERT_EQ(1, results["facet_counts"].size());
|
||||
ASSERT_EQ(2, results["facet_counts"][0]["counts"].size());
|
||||
ASSERT_EQ("{\"b\":0,\"color\":\"red\",\"g\":0,\"r\":255}", results["facet_counts"][0]["counts"][0]["value"]);
|
||||
ASSERT_EQ("{\"b\":255,\"color\":\"blue\",\"g\":0,\"r\":0}", results["facet_counts"][0]["counts"][1]["value"]);
|
||||
|
||||
//not passing facet_fields in facet_return_parent list will only return facet value, not immediate parent for those field
|
||||
search_op = coll1->search("*", {},"", {"value.color"},
|
||||
{}, {2}, 10, 1,FREQUENCY, {true},
|
||||
1, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(),10, "",
|
||||
30, 4, "",
|
||||
Index::TYPO_TOKENS_THRESHOLD, "", "",{},
|
||||
3, "<mark>", "</mark>", {},
|
||||
UINT32_MAX, true, false, true,
|
||||
"", false, 6000*1000, 4, 7,
|
||||
fallback, 4, {off}, INT16_MAX, INT16_MAX,
|
||||
2, 2, false, "",
|
||||
true, 0, max_score, 100,
|
||||
0, 0, VALUE, 30000,
|
||||
2, "", {});
|
||||
|
||||
if(!search_op.ok()) {
|
||||
LOG(ERROR) << search_op.error();
|
||||
FAIL();
|
||||
}
|
||||
results = search_op.get();
|
||||
ASSERT_EQ(1, results["facet_counts"].size());
|
||||
ASSERT_EQ(2, results["facet_counts"][0]["counts"].size());
|
||||
ASSERT_EQ("red", results["facet_counts"][0]["counts"][0]["value"]);
|
||||
ASSERT_EQ("blue", results["facet_counts"][0]["counts"][1]["value"]);
|
||||
|
||||
search_op = coll1->search("*", {},"", {"value.color", "value.r"},
|
||||
{}, {2}, 10, 1,FREQUENCY, {true},
|
||||
1, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(),10, "",
|
||||
30, 4, "",
|
||||
Index::TYPO_TOKENS_THRESHOLD, "", "",{},
|
||||
3, "<mark>", "</mark>", {},
|
||||
UINT32_MAX, true, false, true,
|
||||
"", false, 6000*1000, 4, 7,
|
||||
fallback, 4, {off}, INT16_MAX, INT16_MAX,
|
||||
2, 2, false, "",
|
||||
true, 0, max_score, 100,
|
||||
0, 0, VALUE, 30000,
|
||||
2, "", {"value.r"});
|
||||
|
||||
if(!search_op.ok()) {
|
||||
LOG(ERROR) << search_op.error();
|
||||
FAIL();
|
||||
}
|
||||
results = search_op.get();
|
||||
ASSERT_EQ(2, results["facet_counts"].size());
|
||||
|
||||
ASSERT_EQ(2, results["facet_counts"][0]["counts"].size());
|
||||
ASSERT_EQ("red", results["facet_counts"][0]["counts"][0]["value"]);
|
||||
ASSERT_EQ("blue", results["facet_counts"][0]["counts"][1]["value"]);
|
||||
|
||||
ASSERT_EQ(2, results["facet_counts"][1]["counts"].size());
|
||||
ASSERT_EQ("{\"b\":0,\"color\":\"red\",\"g\":0,\"r\":255}", results["facet_counts"][1]["counts"][0]["value"]);
|
||||
ASSERT_EQ("{\"b\":255,\"color\":\"blue\",\"g\":0,\"r\":0}", results["facet_counts"][1]["counts"][1]["value"]);
|
||||
}
|
||||
|
||||
TEST_F(CollectionOptimizedFacetingTest, FacetingReturnParentDeepNested) {
|
||||
nlohmann::json schema = R"({
|
||||
"name": "coll1",
|
||||
"enable_nested_fields": true,
|
||||
"fields": [
|
||||
{"name": "product.specification.detail.width", "type": "int32", "optional": false, "facet": true }
|
||||
]
|
||||
})"_json;
|
||||
|
||||
auto op = collectionManager.create_collection(schema);
|
||||
ASSERT_TRUE(op.ok());
|
||||
Collection* coll1 = op.get();
|
||||
|
||||
nlohmann::json doc1 = R"({
|
||||
"product" : {
|
||||
"specification": {
|
||||
"detail" : {
|
||||
"width": 25
|
||||
}
|
||||
}
|
||||
}
|
||||
})"_json;
|
||||
|
||||
nlohmann::json doc2 = R"({
|
||||
"product" : {
|
||||
"specification": {
|
||||
"detail" : {
|
||||
"width": 30
|
||||
}
|
||||
}
|
||||
}
|
||||
})"_json;
|
||||
|
||||
auto add_op = coll1->add(doc1.dump(), CREATE);
|
||||
ASSERT_TRUE(add_op.ok());
|
||||
add_op = coll1->add(doc2.dump(), CREATE);
|
||||
ASSERT_TRUE(add_op.ok());
|
||||
|
||||
auto search_op = coll1->search("*", {},"", {"product.specification.detail.width"},
|
||||
{}, {2}, 10, 1,FREQUENCY, {true},
|
||||
1, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(),10, "",
|
||||
30, 4, "",
|
||||
Index::TYPO_TOKENS_THRESHOLD, "", "",{},
|
||||
3, "<mark>", "</mark>", {},
|
||||
UINT32_MAX, true, false, true,
|
||||
"", false, 6000*1000, 4, 7,
|
||||
fallback, 4, {off}, INT16_MAX, INT16_MAX,
|
||||
2, 2, false, "",
|
||||
true, 0, max_score, 100,
|
||||
0, 0, VALUE, 30000,
|
||||
2, "", {"product.specification.detail.width"});
|
||||
|
||||
if(!search_op.ok()) {
|
||||
LOG(ERROR) << search_op.error();
|
||||
FAIL();
|
||||
}
|
||||
auto results = search_op.get();
|
||||
ASSERT_EQ(1, results["facet_counts"].size());
|
||||
ASSERT_EQ(2, results["facet_counts"][0]["counts"].size());
|
||||
ASSERT_EQ("{\"specification\":{\"detail\":{\"width\":30}}}", results["facet_counts"][0]["counts"][0]["value"]);
|
||||
ASSERT_EQ("{\"specification\":{\"detail\":{\"width\":25}}}", results["facet_counts"][0]["counts"][1]["value"]);
|
||||
}
|
||||
|
||||
TEST_F(CollectionOptimizedFacetingTest, FacetingReturnParentObject) {
|
||||
nlohmann::json schema = R"({
|
||||
"name": "coll1",
|
||||
"enable_nested_fields": true,
|
||||
"fields": [
|
||||
{"name": "value", "type": "object", "optional": false, "facet": true }
|
||||
]
|
||||
})"_json;
|
||||
|
||||
auto op = collectionManager.create_collection(schema);
|
||||
ASSERT_TRUE(op.ok());
|
||||
Collection* coll1 = op.get();
|
||||
|
||||
nlohmann::json doc1 = R"({
|
||||
"value": {
|
||||
"color": "red",
|
||||
"r": 255,
|
||||
"g": 0,
|
||||
"b": 0
|
||||
}
|
||||
})"_json;
|
||||
|
||||
nlohmann::json doc2 = R"({
|
||||
"value": {
|
||||
"color": "blue",
|
||||
"r": 0,
|
||||
"g": 0,
|
||||
"b": 255
|
||||
}
|
||||
})"_json;
|
||||
|
||||
auto add_op = coll1->add(doc1.dump(), CREATE);
|
||||
ASSERT_TRUE(add_op.ok());
|
||||
add_op = coll1->add(doc2.dump(), CREATE);
|
||||
ASSERT_TRUE(add_op.ok());
|
||||
|
||||
auto search_op = coll1->search("*", {},"", {"value.color"},
|
||||
{}, {2}, 10, 1,FREQUENCY, {true},
|
||||
1, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(),10, "",
|
||||
30, 4, "",
|
||||
Index::TYPO_TOKENS_THRESHOLD, "", "",{},
|
||||
3, "<mark>", "</mark>", {},
|
||||
UINT32_MAX, true, false, true,
|
||||
"", false, 6000*1000, 4, 7,
|
||||
fallback, 4, {off}, INT16_MAX, INT16_MAX,
|
||||
2, 2, false, "",
|
||||
true, 0, max_score, 100,
|
||||
0, 0, VALUE, 30000,
|
||||
2, "", {"value.color"});
|
||||
|
||||
if(!search_op.ok()) {
|
||||
LOG(ERROR) << search_op.error();
|
||||
FAIL();
|
||||
}
|
||||
auto results = search_op.get();
|
||||
ASSERT_EQ(1, results["facet_counts"].size());
|
||||
ASSERT_EQ(2, results["facet_counts"][0]["counts"].size());
|
||||
ASSERT_EQ("{\"b\":0,\"color\":\"red\",\"g\":0,\"r\":255}", results["facet_counts"][0]["counts"][0]["value"]);
|
||||
ASSERT_EQ("{\"b\":255,\"color\":\"blue\",\"g\":0,\"r\":0}", results["facet_counts"][0]["counts"][1]["value"]);
|
||||
}
|
||||
|
||||
TEST_F(CollectionOptimizedFacetingTest, FacetSortByAlpha) {
|
||||
nlohmann::json schema = R"({
|
||||
"name": "coll1",
|
||||
|
Loading…
x
Reference in New Issue
Block a user