fixed crash and other changes

This commit is contained in:
krunal1313 2023-06-07 16:12:35 +05:30
parent 4127c80aea
commit 6f5f894ee5
4 changed files with 64 additions and 68 deletions

View File

@ -32,10 +32,12 @@ private:
struct facet_index_struct {
void* id_list_ptr;
uint32_t index;
uint32_t count_list_index;
facet_index_struct() {
id_list_ptr = nullptr;
index = UINT32_MAX;
count_list_index = UINT32_MAX;
}
~facet_index_struct() {};
@ -72,8 +74,8 @@ public:
~facet_index_t();
void insert(const std::string& field, const std::string& value,
const std::vector<uint32_t>& ids, uint32_t index);
uint32_t insert(const std::string& field, const std::string& value,
const std::vector<uint32_t>& ids, bool is_string=false);
void erase(const std::string& field);

View File

@ -310,11 +310,6 @@ private:
facet_index_t* facet_index_v4 = nullptr;
//for string and int64 facets insertions
uint32_t count_index;
std::unordered_map<std::string, uint32_t> count_index_map;
// sort_field => (seq_id => value)
spp::sparse_hash_map<std::string, spp::sparse_hash_map<uint32_t, int64_t>*> sort_index;
typedef spp::sparse_hash_map<std::string,

View File

@ -10,63 +10,73 @@ void facet_index_t::initialize(const std::string& field) {
}
}
void facet_index_t::insert(const std::string& field, const std::string& value,
const std::vector<uint32_t>& ids, uint32_t index) {
uint32_t facet_index_t::insert(const std::string& field, const std::string& value,
const std::vector<uint32_t>& ids, bool is_string) {
const auto facet_field_map_it = facet_field_map.find(field);
if(facet_field_map_it == facet_field_map.end()) {
return; //field is not initialized or dropped
return 0; //field is not initialized or dropped
}
uint32_t index = 0;
auto& facet_index_map = facet_field_map_it->second.facet_index_map;
const auto sv = value.substr(0, 100);
const auto it = facet_index_map.find(sv);
const auto facet_index_map_it = facet_index_map.find(sv);
bool inserted_newly = false;
if(it == facet_index_map.end()) {
if(facet_index_map_it == facet_index_map.end()) {
index = ++count_index;
facet_index_struct fis{};
fis.index = index;
fis.id_list_ptr = SET_COMPACT_IDS(compact_id_list_t::create(ids.size(), ids));
if(is_string) {
fis.id_list_ptr = SET_COMPACT_IDS(compact_id_list_t::create(ids.size(), ids));
}
facet_index_map.emplace(sv, fis);
inserted_newly = true;
} else {
auto ids_ptr = it->id_list_ptr;
for(const auto& id : ids) {
if (!ids_t::contains(ids_ptr, id)) {
ids_t::upsert(ids_ptr, id);
facet_index_map[sv].id_list_ptr = ids_ptr;
index = facet_index_map_it->index;
if(is_string) {
auto ids_ptr = facet_index_map_it->id_list_ptr;
for(const auto& id : ids) {
if (!ids_t::contains(ids_ptr, id)) {
ids_t::upsert(ids_ptr, id);
facet_index_map[sv].id_list_ptr = ids_ptr;
}
}
}
}
const auto facet_count = ids_t::num_ids(facet_index_map.at(sv).id_list_ptr);
//LOG(INFO) << "Facet count in facet " << sv << " : " << facet_count;
auto& counter_list = facet_field_map_it->second.counter_list;
if(is_string) {
const auto facet_count = ids_t::num_ids(facet_index_map.at(sv).id_list_ptr);
//LOG(INFO) << "Facet count in facet " << sv << " : " << facet_count;
auto& counter_list = facet_field_map_it->second.counter_list;
if(inserted_newly) {
count_list* node = new count_list(sv, facet_count, index);
counter_list.emplace_back(node);
facet_index_map.at(sv).count_list_index = counter_list.size()-1;
} else {
auto ind = facet_index_map_it->count_list_index;
if(counter_list.empty()) {
count_list* node = new count_list(sv, facet_count, index);
counter_list.emplace_back(node);
} else {
auto ind = 0;
for(; ind < counter_list.size(); ++ind) {
if(counter_list[ind]->index == index) {
counter_list[ind]->count = facet_count;
if(ind > 1) {
auto curr = ind;
while (curr && (counter_list[curr-1]->count < counter_list[curr]->count)) {
std::swap(counter_list[curr-1], counter_list[curr]);
facet_index_map.at(counter_list[curr-1]->facet_value).count_list_index = curr-1;
facet_index_map.at(counter_list[curr]->facet_value).count_list_index = curr;
--curr;
}
}
break;
} else {
LOG(ERROR) << "Wrong count_index stored for facet " << sv << " with index " << index;
}
}
if(ind == counter_list.size()) {
// LOG (INFO) << "inserting at last facet " << node.facet_value
// << " with count " << node.count;
count_list* node = new count_list(sv, facet_count, index);
counter_list.emplace_back(node);
}
}
return index;
}
bool facet_index_t::contains(const std::string& field) {

View File

@ -328,7 +328,7 @@ void Index::compute_token_offsets_facets(index_record& record,
offsets);
std::string val = document[field_name];
if(!val.empty()) {
facet_hashes[val].push_back(record.seq_id);
facet_hashes[val].emplace_back(record.seq_id);
}
} else {
tokenize_string_array(document[field_name], is_facet, the_field,
@ -603,9 +603,12 @@ size_t Index::batch_memory_index(Index *index, std::vector<index_record>& iter_b
found_fields.insert(kv.key());
}
for(const auto& kv : index_rec.facet_hashes) {
std::copy(kv.second.begin(), kv.second.end(),
std::back_inserter(facet_hashes[kv.first]));
for(auto& kv : index_rec.facet_hashes) {
auto end = facet_hashes[kv.first].end();
facet_hashes[kv.first].insert(end, std::make_move_iterator(kv.second.begin()),
std::make_move_iterator(kv.second.end()));
kv.second.clear();
}
}
@ -758,7 +761,7 @@ void Index::index_field_in_memory(const field& afield, std::vector<index_record>
continue;
}
std::string value;
std::string value="";
uint32_t fhash = 0;
if(afield.facet) {
@ -771,19 +774,10 @@ void Index::index_field_in_memory(const field& afield, std::vector<index_record>
} else if(afield.type == field_types::INT64_ARRAY) {
int64_t raw_val = document[afield.name][i].get<int64_t>();
value = std::to_string(raw_val);
auto it = count_index_map.find(value);
if(it == count_index_map.end()) {
count_index_map.emplace(value, ++count_index);
}
fhash = count_index_map.at(value);
fhash = facet_index_v4->insert(afield.name, value, facet_hashes[value]);
} else if(afield.type == field_types::STRING_ARRAY) {
value = document[afield.name][i];
auto it = count_index_map.find(value);
if(it == count_index_map.end()) {
count_index_map.emplace(value, ++count_index);
}
fhash = count_index_map.at(value);
facet_index_v4->insert(afield.name, value, facet_hashes[value], fhash);
fhash = facet_index_v4->insert(afield.name, value, facet_hashes[value], true);
} else if(afield.type == field_types::FLOAT_ARRAY) {
float raw_val = document[afield.name][i].get<float>();
fhash = reinterpret_cast<uint32_t&>(raw_val);
@ -797,9 +791,11 @@ void Index::index_field_in_memory(const field& afield, std::vector<index_record>
}
}
if(facet_index && facet_threshold_count > FACET_INDEX_THRESHOLD) {
facet_index->upsert(seq_id, std::move(fhashvalues));
fhashvalues.clear();
if(facet_index!=nullptr) {
if (facet_threshold_count > FACET_INDEX_THRESHOLD) {
facet_index->upsert(seq_id, std::move(fhashvalues));
fhashvalues.clear();
}
} else {
LOG(ERROR) << "facet_index was null while inserting for facet " << afield.name;
}
@ -813,20 +809,11 @@ void Index::index_field_in_memory(const field& afield, std::vector<index_record>
else if(afield.type == field_types::INT64) {
int64_t raw_val = document[afield.name].get<int64_t>();
value = std::to_string(raw_val);
auto it = count_index_map.find(value);
if(it == count_index_map.end()) {
count_index_map.emplace(value, ++count_index);
}
fhash = count_index_map.at(value);
fhash = facet_index_v4->insert(afield.name, value, facet_hashes[value]);
}
else if(afield.type == field_types::STRING) {
value = document[afield.name];
auto it = count_index_map.find(value);
if(it == count_index_map.end()) {
count_index_map.emplace(value, ++count_index);
}
fhash = count_index_map.at(value);
facet_index_v4->insert(afield.name, value, facet_hashes[value], fhash);
fhash = facet_index_v4->insert(afield.name, value, facet_hashes[value], true);
}
else if(afield.type == field_types::FLOAT) {
float raw_val = document[afield.name].get<float>();
@ -837,8 +824,10 @@ void Index::index_field_in_memory(const field& afield, std::vector<index_record>
fhash = (uint32_t)raw_val;
}
if(facet_index && facet_threshold_count > FACET_INDEX_THRESHOLD) {
facet_index->upsert(seq_id, {fhash});
if(facet_index!=nullptr) {
if (facet_threshold_count > FACET_INDEX_THRESHOLD) {
facet_index->upsert(seq_id, {fhash});
}
} else {
LOG(ERROR) << "facet_index was null while inserting for facet " << afield.name;
}
@ -1213,7 +1202,7 @@ void Index::tokenize_string_array(const std::vector<std::string>& strings, bool
}
void Index::initialize_facet_indexes(const field& facet_field) {
if(facet_field.is_string()) {
if(facet_field.is_string() || facet_field.is_int64()) {
facet_index_v4->initialize(facet_field.name);
}