From 6f5f894ee530bfbc3f90ba692ba21913af6a4798 Mon Sep 17 00:00:00 2001 From: krunal1313 Date: Wed, 7 Jun 2023 16:12:35 +0530 Subject: [PATCH] fixed crash and other changes --- include/facet_index.h | 6 ++-- include/index.h | 5 ---- src/facet_index.cpp | 66 +++++++++++++++++++++++++------------------ src/index.cpp | 55 +++++++++++++++--------------------- 4 files changed, 64 insertions(+), 68 deletions(-) diff --git a/include/facet_index.h b/include/facet_index.h index 124eaf72..fb9541d3 100644 --- a/include/facet_index.h +++ b/include/facet_index.h @@ -32,10 +32,12 @@ private: struct facet_index_struct { void* id_list_ptr; uint32_t index; + uint32_t count_list_index; facet_index_struct() { id_list_ptr = nullptr; index = UINT32_MAX; + count_list_index = UINT32_MAX; } ~facet_index_struct() {}; @@ -72,8 +74,8 @@ public: ~facet_index_t(); - void insert(const std::string& field, const std::string& value, - const std::vector& ids, uint32_t index); + uint32_t insert(const std::string& field, const std::string& value, + const std::vector& ids, bool is_string=false); void erase(const std::string& field); diff --git a/include/index.h b/include/index.h index 4630cd63..a3aaa9e6 100644 --- a/include/index.h +++ b/include/index.h @@ -310,11 +310,6 @@ private: facet_index_t* facet_index_v4 = nullptr; - //for string and int64 facets insertions - uint32_t count_index; - - std::unordered_map count_index_map; - // sort_field => (seq_id => value) spp::sparse_hash_map*> sort_index; typedef spp::sparse_hash_map& ids, uint32_t index) { +uint32_t facet_index_t::insert(const std::string& field, const std::string& value, + const std::vector& ids, bool is_string) { const auto facet_field_map_it = facet_field_map.find(field); if(facet_field_map_it == facet_field_map.end()) { - return; //field is not initialized or dropped + return 0; //field is not initialized or dropped } + + uint32_t index = 0; auto& facet_index_map = facet_field_map_it->second.facet_index_map; const auto sv = value.substr(0, 100); - const auto it = facet_index_map.find(sv); + const auto facet_index_map_it = facet_index_map.find(sv); + bool inserted_newly = false; - if(it == facet_index_map.end()) { + if(facet_index_map_it == facet_index_map.end()) { + index = ++count_index; facet_index_struct fis{}; fis.index = index; - fis.id_list_ptr = SET_COMPACT_IDS(compact_id_list_t::create(ids.size(), ids)); + if(is_string) { + fis.id_list_ptr = SET_COMPACT_IDS(compact_id_list_t::create(ids.size(), ids)); + } facet_index_map.emplace(sv, fis); + inserted_newly = true; } else { - auto ids_ptr = it->id_list_ptr; - for(const auto& id : ids) { - if (!ids_t::contains(ids_ptr, id)) { - ids_t::upsert(ids_ptr, id); - facet_index_map[sv].id_list_ptr = ids_ptr; + index = facet_index_map_it->index; + if(is_string) { + auto ids_ptr = facet_index_map_it->id_list_ptr; + for(const auto& id : ids) { + if (!ids_t::contains(ids_ptr, id)) { + ids_t::upsert(ids_ptr, id); + facet_index_map[sv].id_list_ptr = ids_ptr; + } } } } - const auto facet_count = ids_t::num_ids(facet_index_map.at(sv).id_list_ptr); - //LOG(INFO) << "Facet count in facet " << sv << " : " << facet_count; - auto& counter_list = facet_field_map_it->second.counter_list; + if(is_string) { + const auto facet_count = ids_t::num_ids(facet_index_map.at(sv).id_list_ptr); + //LOG(INFO) << "Facet count in facet " << sv << " : " << facet_count; + auto& counter_list = facet_field_map_it->second.counter_list; + + if(inserted_newly) { + count_list* node = new count_list(sv, facet_count, index); + counter_list.emplace_back(node); + facet_index_map.at(sv).count_list_index = counter_list.size()-1; + } else { + auto ind = facet_index_map_it->count_list_index; - if(counter_list.empty()) { - count_list* node = new count_list(sv, facet_count, index); - counter_list.emplace_back(node); - } else { - auto ind = 0; - - for(; ind < counter_list.size(); ++ind) { if(counter_list[ind]->index == index) { counter_list[ind]->count = facet_count; if(ind > 1) { auto curr = ind; while (curr && (counter_list[curr-1]->count < counter_list[curr]->count)) { std::swap(counter_list[curr-1], counter_list[curr]); + facet_index_map.at(counter_list[curr-1]->facet_value).count_list_index = curr-1; + facet_index_map.at(counter_list[curr]->facet_value).count_list_index = curr; --curr; } } - break; + } else { + LOG(ERROR) << "Wrong count_index stored for facet " << sv << " with index " << index; } } - if(ind == counter_list.size()) { - // LOG (INFO) << "inserting at last facet " << node.facet_value - // << " with count " << node.count; - count_list* node = new count_list(sv, facet_count, index); - counter_list.emplace_back(node); - } } + + return index; } bool facet_index_t::contains(const std::string& field) { diff --git a/src/index.cpp b/src/index.cpp index a6cee7c3..47e97945 100644 --- a/src/index.cpp +++ b/src/index.cpp @@ -328,7 +328,7 @@ void Index::compute_token_offsets_facets(index_record& record, offsets); std::string val = document[field_name]; if(!val.empty()) { - facet_hashes[val].push_back(record.seq_id); + facet_hashes[val].emplace_back(record.seq_id); } } else { tokenize_string_array(document[field_name], is_facet, the_field, @@ -603,9 +603,12 @@ size_t Index::batch_memory_index(Index *index, std::vector& iter_b found_fields.insert(kv.key()); } - for(const auto& kv : index_rec.facet_hashes) { - std::copy(kv.second.begin(), kv.second.end(), - std::back_inserter(facet_hashes[kv.first])); + for(auto& kv : index_rec.facet_hashes) { + auto end = facet_hashes[kv.first].end(); + + facet_hashes[kv.first].insert(end, std::make_move_iterator(kv.second.begin()), + std::make_move_iterator(kv.second.end())); + kv.second.clear(); } } @@ -758,7 +761,7 @@ void Index::index_field_in_memory(const field& afield, std::vector continue; } - std::string value; + std::string value=""; uint32_t fhash = 0; if(afield.facet) { @@ -771,19 +774,10 @@ void Index::index_field_in_memory(const field& afield, std::vector } else if(afield.type == field_types::INT64_ARRAY) { int64_t raw_val = document[afield.name][i].get(); value = std::to_string(raw_val); - auto it = count_index_map.find(value); - if(it == count_index_map.end()) { - count_index_map.emplace(value, ++count_index); - } - fhash = count_index_map.at(value); + fhash = facet_index_v4->insert(afield.name, value, facet_hashes[value]); } else if(afield.type == field_types::STRING_ARRAY) { value = document[afield.name][i]; - auto it = count_index_map.find(value); - if(it == count_index_map.end()) { - count_index_map.emplace(value, ++count_index); - } - fhash = count_index_map.at(value); - facet_index_v4->insert(afield.name, value, facet_hashes[value], fhash); + fhash = facet_index_v4->insert(afield.name, value, facet_hashes[value], true); } else if(afield.type == field_types::FLOAT_ARRAY) { float raw_val = document[afield.name][i].get(); fhash = reinterpret_cast(raw_val); @@ -797,9 +791,11 @@ void Index::index_field_in_memory(const field& afield, std::vector } } - if(facet_index && facet_threshold_count > FACET_INDEX_THRESHOLD) { - facet_index->upsert(seq_id, std::move(fhashvalues)); - fhashvalues.clear(); + if(facet_index!=nullptr) { + if (facet_threshold_count > FACET_INDEX_THRESHOLD) { + facet_index->upsert(seq_id, std::move(fhashvalues)); + fhashvalues.clear(); + } } else { LOG(ERROR) << "facet_index was null while inserting for facet " << afield.name; } @@ -813,20 +809,11 @@ void Index::index_field_in_memory(const field& afield, std::vector else if(afield.type == field_types::INT64) { int64_t raw_val = document[afield.name].get(); value = std::to_string(raw_val); - auto it = count_index_map.find(value); - if(it == count_index_map.end()) { - count_index_map.emplace(value, ++count_index); - } - fhash = count_index_map.at(value); + fhash = facet_index_v4->insert(afield.name, value, facet_hashes[value]); } else if(afield.type == field_types::STRING) { value = document[afield.name]; - auto it = count_index_map.find(value); - if(it == count_index_map.end()) { - count_index_map.emplace(value, ++count_index); - } - fhash = count_index_map.at(value); - facet_index_v4->insert(afield.name, value, facet_hashes[value], fhash); + fhash = facet_index_v4->insert(afield.name, value, facet_hashes[value], true); } else if(afield.type == field_types::FLOAT) { float raw_val = document[afield.name].get(); @@ -837,8 +824,10 @@ void Index::index_field_in_memory(const field& afield, std::vector fhash = (uint32_t)raw_val; } - if(facet_index && facet_threshold_count > FACET_INDEX_THRESHOLD) { - facet_index->upsert(seq_id, {fhash}); + if(facet_index!=nullptr) { + if (facet_threshold_count > FACET_INDEX_THRESHOLD) { + facet_index->upsert(seq_id, {fhash}); + } } else { LOG(ERROR) << "facet_index was null while inserting for facet " << afield.name; } @@ -1213,7 +1202,7 @@ void Index::tokenize_string_array(const std::vector& strings, bool } void Index::initialize_facet_indexes(const field& facet_field) { - if(facet_field.is_string()) { + if(facet_field.is_string() || facet_field.is_int64()) { facet_index_v4->initialize(facet_field.name); }