From 574a46d77ddfcd1a380c3709430543d93ac17486 Mon Sep 17 00:00:00 2001 From: krunal1313 Date: Wed, 19 Apr 2023 14:58:29 +0530 Subject: [PATCH] review patch by @kishorenc --- include/facet_index.h | 2 +- include/index.h | 18 +++++------ src/facet_index.cpp | 26 +++++++-------- src/index.cpp | 74 ++++++++++++++++++++----------------------- 4 files changed, 58 insertions(+), 62 deletions(-) diff --git a/include/facet_index.h b/include/facet_index.h index 496c096c..6cf0bb84 100644 --- a/include/facet_index.h +++ b/include/facet_index.h @@ -51,7 +51,7 @@ public: size_t get_facet_count(const std::string& field); - int intersect(const std::string& val, const uint32_t* result_ids, int result_id_len, + size_t intersect(const std::string& val, const uint32_t* result_ids, int result_id_len, int max_facet_count, std::map& found, bool is_wildcard_no_filter_query); std::string get_facet_by_count_index(const std::string& field, uint32_t count_index); diff --git a/include/index.h b/include/index.h index 3e3a2cf9..2bdc6fb6 100644 --- a/include/index.h +++ b/include/index.h @@ -506,16 +506,16 @@ private: void insert_doc(const int64_t score, art_tree *t, uint32_t seq_id, const std::unordered_map> &token_to_offsets) const; - static void tokenize_string_with_facets(const std::string& text, bool is_facet, const field& a_field, - const std::vector& symbols_to_index, - const std::vector& token_separators, - std::unordered_map>& token_to_offsets); + static void tokenize_string(const std::string& text, bool is_facet, const field& a_field, + const std::vector& symbols_to_index, + const std::vector& token_separators, + std::unordered_map>& token_to_offsets); - static void tokenize_string_array_with_facets(const std::vector& strings, bool is_facet, - const field& a_field, - const std::vector& symbols_to_index, - const std::vector& token_separators, - std::unordered_map>& token_to_offsets); + static void tokenize_string_array(const std::vector& strings, bool is_facet, + const field& a_field, + const std::vector& symbols_to_index, + const std::vector& token_separators, + std::unordered_map>& token_to_offsets); void collate_included_ids(const std::vector& q_included_tokens, const std::map> & included_ids_map, diff --git a/src/facet_index.cpp b/src/facet_index.cpp index cccb01ec..e00a4c9b 100644 --- a/src/facet_index.cpp +++ b/src/facet_index.cpp @@ -7,6 +7,7 @@ uint32_t facet_index_t::insert(const std::string& field, const std::string& valu uint32_t index; const auto sv = value.substr(0, 100); const auto it = facet_index_map.find(sv); + if(it == facet_index_map.end()) { index = ++count_index; @@ -14,7 +15,7 @@ uint32_t facet_index_t::insert(const std::string& field, const std::string& valu fis.id_list_ptr = SET_COMPACT_IDS(compact_id_list_t::create(1, {id})); fis.index = index; facet_index_map.emplace(sv, fis); - }else { + } else { auto ids = it->id_list_ptr; if (!ids_t::contains(ids, id)) { ids_t::upsert(ids, id); @@ -25,17 +26,16 @@ uint32_t facet_index_t::insert(const std::string& field, const std::string& valu const auto facet_count = ids_t::num_ids(facet_index_map.at(sv).id_list_ptr); //LOG(INFO) << "Facet count in facet " << sv << " : " << facet_count; auto& counter_list = facet_field_map[field].counter_list; - count_list node(sv, facet_count); if(counter_list.empty()) { - counter_list.emplace_back(count_list(sv, facet_count)); + counter_list.emplace_back(sv, facet_count); } else { - auto it = counter_list.begin(); + auto counter_it = counter_list.begin(); //remove node from list - for(it = counter_list.begin(); it != counter_list.end(); ++it) { - if(it->facet_value == sv) { + for(counter_it = counter_list.begin(); counter_it != counter_list.end(); ++counter_it) { + if(counter_it->facet_value == sv) { //found facet in first node - counter_list.erase(it); + counter_list.erase(counter_it); break; } } @@ -43,15 +43,15 @@ uint32_t facet_index_t::insert(const std::string& field, const std::string& valu //find position in list and add node with updated count count_list node(sv, facet_count); - for(it = counter_list.begin(); it != counter_list.end(); ++it) { + for(counter_it = counter_list.begin(); counter_it != counter_list.end(); ++counter_it) { // LOG (INFO) << "inserting in middle or front facet " << node.facet_value // << " with count " << node.count; - if(it->count <= facet_count) { - counter_list.emplace(it, node); + if(counter_it->count <= facet_count) { + counter_list.emplace(counter_it, node); break; } } - if(it == counter_list.end()) { + if(counter_it == counter_list.end()) { // LOG (INFO) << "inserting at last facet " << node.facet_value // << " with count " << node.count; counter_list.emplace_back(node); @@ -89,7 +89,7 @@ size_t facet_index_t::get_facet_count(const std::string& field) { } //returns the count of matching seq_ids from result array -int facet_index_t::intersect(const std::string& field, const uint32_t* result_ids, +size_t facet_index_t::intersect(const std::string& field, const uint32_t* result_ids, int result_ids_len, int max_facet_count, std::map& found, bool is_wildcard_no_filter_query) { //LOG (INFO) << "intersecting field " << field; @@ -109,7 +109,7 @@ int facet_index_t::intersect(const std::string& field, const uint32_t* result_id for(const auto& counter_list_it : counter_list) { // LOG (INFO) << "checking ids in facet_value " << counter_list_it.facet_value // << " having total count " << counter_list_it.count; - int count = 0; + uint32_t count = 0; if(is_wildcard_no_filter_query) { count = counter_list_it.count; diff --git a/src/index.cpp b/src/index.cpp index 67432d69..638f7837 100644 --- a/src/index.cpp +++ b/src/index.cpp @@ -266,7 +266,7 @@ void Index::compute_token_offsets_facets(index_record& record, continue; } - offsets_facet_hashes_t offset_facet_hashes; + std::unordered_map> offsets; bool is_facet = search_schema.at(field_name).facet; @@ -293,9 +293,9 @@ void Index::compute_token_offsets_facets(index_record& record, } } - tokenize_string_array_with_facets(strings, is_facet, the_field, - local_symbols_to_index, local_token_separators, - offset_facet_hashes.offsets/*, offset_facet_hashes.facet_hashes*/); + tokenize_string_array(strings, is_facet, the_field, + local_symbols_to_index, local_token_separators, + offsets); } else { std::string text; @@ -309,27 +309,27 @@ void Index::compute_token_offsets_facets(index_record& record, text = std::to_string(document[field_name].get()); } - tokenize_string_with_facets(text, is_facet, the_field, - local_symbols_to_index, local_token_separators, - offset_facet_hashes.offsets/*, offset_facet_hashes.facet_hashes*/); + tokenize_string(text, is_facet, the_field, + local_symbols_to_index, local_token_separators, + offsets); } } if(the_field.is_string()) { if(the_field.type == field_types::STRING) { - tokenize_string_with_facets(document[field_name], is_facet, the_field, - local_symbols_to_index, local_token_separators, - offset_facet_hashes.offsets); + tokenize_string(document[field_name], is_facet, the_field, + local_symbols_to_index, local_token_separators, + offsets); } else { - tokenize_string_array_with_facets(document[field_name], is_facet, the_field, - local_symbols_to_index, local_token_separators, - offset_facet_hashes.offsets); + tokenize_string_array(document[field_name], is_facet, the_field, + local_symbols_to_index, local_token_separators, + offsets); } } - if(!offset_facet_hashes.offsets.empty()) { - record.field_index.emplace(field_name, std::move(offset_facet_hashes)); + if(!offsets.empty()) { + record.field_index.emplace(field_name, std::move(offsets)); } } } @@ -659,38 +659,34 @@ void Index::index_field_in_memory(const field& afield, std::vector facet_hash_values_t fhashvalues; if(afield.type == field_types::INT32_ARRAY) { - for(int i = 0; i < document[afield.name].size(); ++i) { + for(size_t i = 0; i < document[afield.name].size(); ++i) { int32_t raw_val = document[afield.name][i].get(); value = std::to_string(raw_val); auto index = facet_index_v4->insert(afield.name, value, seq_id); fhashvalues.hashes.emplace_back(index); } - } - else if(afield.type == field_types::INT64_ARRAY) { - for(int i = 0; i < document[afield.name].size(); ++i) { + } else if(afield.type == field_types::INT64_ARRAY) { + for(size_t i = 0; i < document[afield.name].size(); ++i) { int64_t raw_val = document[afield.name][i].get(); value = std::to_string(raw_val); auto index = facet_index_v4->insert(afield.name, value, seq_id); fhashvalues.hashes.emplace_back(index); } - } - else if(afield.type == field_types::STRING_ARRAY) { - for(int i = 0; i < document[afield.name].size(); ++i) { + } else if(afield.type == field_types::STRING_ARRAY) { + for(size_t i = 0; i < document[afield.name].size(); ++i) { value = document[afield.name][i]; auto index = facet_index_v4->insert(afield.name, value, seq_id); fhashvalues.hashes.emplace_back(index); } - } - else if(afield.type == field_types::FLOAT_ARRAY) { - for(int i = 0; i < document[afield.name].size(); ++i) { + } else if(afield.type == field_types::FLOAT_ARRAY) { + for(size_t i = 0; i < document[afield.name].size(); ++i) { float raw_val = document[afield.name][i].get(); value = StringUtils::float_to_str(raw_val); auto index = facet_index_v4->insert(afield.name, value, seq_id); fhashvalues.hashes.emplace_back(index); } - } - else if(afield.type == field_types::BOOL_ARRAY) { - for(int i = 0; i < document[afield.name].size(); ++i) { + } else if(afield.type == field_types::BOOL_ARRAY) { + for(size_t i = 0; i < document[afield.name].size(); ++i) { value = std::to_string(document[afield.name][i].get()); auto index = facet_index_v4->insert(afield.name, value, seq_id); fhashvalues.hashes.emplace_back(index); @@ -1028,10 +1024,10 @@ void Index::index_field_in_memory(const field& afield, std::vector } } -void Index::tokenize_string_with_facets(const std::string& text, bool is_facet, const field& a_field, - const std::vector& symbols_to_index, - const std::vector& token_separators, - std::unordered_map>& token_to_offsets) { +void Index::tokenize_string(const std::string& text, bool is_facet, const field& a_field, + const std::vector& symbols_to_index, + const std::vector& token_separators, + std::unordered_map>& token_to_offsets) { Tokenizer tokenizer(text, true, !a_field.is_string(), a_field.locale, symbols_to_index, token_separators); std::string token; @@ -1057,11 +1053,11 @@ void Index::tokenize_string_with_facets(const std::string& text, bool is_facet, } } -void Index::tokenize_string_array_with_facets(const std::vector& strings, bool is_facet, - const field& a_field, - const std::vector& symbols_to_index, - const std::vector& token_separators, - std::unordered_map>& token_to_offsets) { +void Index::tokenize_string_array(const std::vector& strings, bool is_facet, + const field& a_field, + const std::vector& symbols_to_index, + const std::vector& token_separators, + std::unordered_map>& token_to_offsets) { for(size_t array_index = 0; array_index < strings.size(); array_index++) { const std::string& str = strings[array_index]; @@ -1170,8 +1166,8 @@ void Index::do_facets(std::vector & facets, facet_query_t & facet_query, const auto facet_records = facet_index_v4->get_facet_count(a_facet.field_name); - if(results_size && facet_records && (facet_records <= 10 || is_wildcard_query == true) - && use_facet_query == false && group_limit == 0 && no_filters_provided == true) { + if(results_size && facet_records && (facet_records <= 10 || is_wildcard_query) && + !use_facet_query && group_limit == 0 && no_filters_provided) { //LOG(INFO) << "Using intersection to find facets"; a_facet.is_intersected = true;