From ced85b446d874ddf8a2579784b7596bde5c85962 Mon Sep 17 00:00:00 2001 From: Kishore Nallan Date: Tue, 15 Aug 2023 13:14:20 +0530 Subject: [PATCH] Avoid needless looping for value based facet query. --- src/index.cpp | 57 ++++++++++++++++++++++++++++++++------------------- 1 file changed, 36 insertions(+), 21 deletions(-) diff --git a/src/index.cpp b/src/index.cpp index de07a27a..49f38451 100644 --- a/src/index.cpp +++ b/src/index.cpp @@ -1314,11 +1314,11 @@ void Index::do_facets(std::vector & facets, facet_query_t & facet_query, facet_count_t& facet_count = a_facet.result_map[kv.first]; facet_count.count = kv.second; - a_facet.hash_tokens[kv.first] = fquery_hashes.at(facet_field.name); + a_facet.hash_tokens[kv.first] = searched_tokens; } } } - + } else { facet_count_t& facet_count = a_facet.result_map[kv.first]; facet_count.count = kv.second; @@ -1331,7 +1331,7 @@ void Index::do_facets(std::vector & facets, facet_query_t & facet_query, compute_facet_stats(a_facet, kv.first, facet_field.type); } } - } + } } else { //LOG(INFO) << "Using hashing to find facets"; bool facet_hash_index_exists = facet_index_v4->has_hash_index(facet_field.name); @@ -4463,33 +4463,48 @@ void Index::compute_facet_infos(const std::vector& facets, facet_query_t& //LOG(INFO) << "si: " << si << ", field_result_ids_len: " << field_result_ids_len; - for(size_t i = 0; i < field_result_ids_len; i++) { - uint32_t seq_id = field_result_ids[i]; - bool id_matched = true; - +#ifdef TEST_BUILD + if(facet_index_type == VALUE) { +#else + if(facet_value_index_exists && facet_infos[findex].use_value_index) { +#endif + size_t num_tokens_found = 0; for(auto pl: posting_lists) { - if(!posting_t::contains(pl, seq_id)) { - // need to ensure that document ID actually contains searched_query tokens - // since `field_result_ids` contains documents matched across all queries - id_matched = false; + if(posting_t::contains_atleast_one(pl, field_result_ids, field_result_ids_len)) { + num_tokens_found++; + } else { break; } } - if(!id_matched) { - continue; - } - - #ifdef TEST_BUILD - if(facet_index_type == VALUE) { - #else - if(facet_value_index_exists && facet_infos[findex].use_value_index) { - #endif + if(num_tokens_found == posting_lists.size()) { + // need to ensure that document ID actually contains searched_query tokens + // since `field_result_ids` contains documents matched across all queries // value based index for(const auto& val : searched_tokens) { facet_infos[findex].hashes[facet_field.name].emplace_back(val); } - } else { + } + } + + else { + for(size_t i = 0; i < field_result_ids_len; i++) { + uint32_t seq_id = field_result_ids[i]; + bool id_matched = true; + + for(auto pl: posting_lists) { + if(!posting_t::contains(pl, seq_id)) { + // need to ensure that document ID actually contains searched_query tokens + // since `field_result_ids` contains documents matched across all queries + id_matched = false; + break; + } + } + + if(!id_matched) { + continue; + } + std::vector facet_hashes; auto facet_index = facet_index_v4->get_facet_hash_index(a_facet.field_name); posting_list_t::iterator_t facet_index_it = facet_index->new_iterator();