mirror of
https://github.com/typesense/typesense.git
synced 2025-05-20 21:52:23 +08:00
fixed crash and other changes
This commit is contained in:
parent
4127c80aea
commit
6f5f894ee5
@ -32,10 +32,12 @@ private:
|
||||
struct facet_index_struct {
|
||||
void* id_list_ptr;
|
||||
uint32_t index;
|
||||
uint32_t count_list_index;
|
||||
|
||||
facet_index_struct() {
|
||||
id_list_ptr = nullptr;
|
||||
index = UINT32_MAX;
|
||||
count_list_index = UINT32_MAX;
|
||||
}
|
||||
|
||||
~facet_index_struct() {};
|
||||
@ -72,8 +74,8 @@ public:
|
||||
|
||||
~facet_index_t();
|
||||
|
||||
void insert(const std::string& field, const std::string& value,
|
||||
const std::vector<uint32_t>& ids, uint32_t index);
|
||||
uint32_t insert(const std::string& field, const std::string& value,
|
||||
const std::vector<uint32_t>& ids, bool is_string=false);
|
||||
|
||||
void erase(const std::string& field);
|
||||
|
||||
|
@ -310,11 +310,6 @@ private:
|
||||
|
||||
facet_index_t* facet_index_v4 = nullptr;
|
||||
|
||||
//for string and int64 facets insertions
|
||||
uint32_t count_index;
|
||||
|
||||
std::unordered_map<std::string, uint32_t> count_index_map;
|
||||
|
||||
// sort_field => (seq_id => value)
|
||||
spp::sparse_hash_map<std::string, spp::sparse_hash_map<uint32_t, int64_t>*> sort_index;
|
||||
typedef spp::sparse_hash_map<std::string,
|
||||
|
@ -10,63 +10,73 @@ void facet_index_t::initialize(const std::string& field) {
|
||||
}
|
||||
}
|
||||
|
||||
void facet_index_t::insert(const std::string& field, const std::string& value,
|
||||
const std::vector<uint32_t>& ids, uint32_t index) {
|
||||
uint32_t facet_index_t::insert(const std::string& field, const std::string& value,
|
||||
const std::vector<uint32_t>& ids, bool is_string) {
|
||||
|
||||
const auto facet_field_map_it = facet_field_map.find(field);
|
||||
if(facet_field_map_it == facet_field_map.end()) {
|
||||
return; //field is not initialized or dropped
|
||||
return 0; //field is not initialized or dropped
|
||||
}
|
||||
|
||||
uint32_t index = 0;
|
||||
|
||||
auto& facet_index_map = facet_field_map_it->second.facet_index_map;
|
||||
const auto sv = value.substr(0, 100);
|
||||
const auto it = facet_index_map.find(sv);
|
||||
const auto facet_index_map_it = facet_index_map.find(sv);
|
||||
bool inserted_newly = false;
|
||||
|
||||
if(it == facet_index_map.end()) {
|
||||
if(facet_index_map_it == facet_index_map.end()) {
|
||||
index = ++count_index;
|
||||
facet_index_struct fis{};
|
||||
fis.index = index;
|
||||
fis.id_list_ptr = SET_COMPACT_IDS(compact_id_list_t::create(ids.size(), ids));
|
||||
if(is_string) {
|
||||
fis.id_list_ptr = SET_COMPACT_IDS(compact_id_list_t::create(ids.size(), ids));
|
||||
}
|
||||
facet_index_map.emplace(sv, fis);
|
||||
inserted_newly = true;
|
||||
} else {
|
||||
auto ids_ptr = it->id_list_ptr;
|
||||
for(const auto& id : ids) {
|
||||
if (!ids_t::contains(ids_ptr, id)) {
|
||||
ids_t::upsert(ids_ptr, id);
|
||||
facet_index_map[sv].id_list_ptr = ids_ptr;
|
||||
index = facet_index_map_it->index;
|
||||
if(is_string) {
|
||||
auto ids_ptr = facet_index_map_it->id_list_ptr;
|
||||
for(const auto& id : ids) {
|
||||
if (!ids_t::contains(ids_ptr, id)) {
|
||||
ids_t::upsert(ids_ptr, id);
|
||||
facet_index_map[sv].id_list_ptr = ids_ptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const auto facet_count = ids_t::num_ids(facet_index_map.at(sv).id_list_ptr);
|
||||
//LOG(INFO) << "Facet count in facet " << sv << " : " << facet_count;
|
||||
auto& counter_list = facet_field_map_it->second.counter_list;
|
||||
if(is_string) {
|
||||
const auto facet_count = ids_t::num_ids(facet_index_map.at(sv).id_list_ptr);
|
||||
//LOG(INFO) << "Facet count in facet " << sv << " : " << facet_count;
|
||||
auto& counter_list = facet_field_map_it->second.counter_list;
|
||||
|
||||
if(inserted_newly) {
|
||||
count_list* node = new count_list(sv, facet_count, index);
|
||||
counter_list.emplace_back(node);
|
||||
facet_index_map.at(sv).count_list_index = counter_list.size()-1;
|
||||
} else {
|
||||
auto ind = facet_index_map_it->count_list_index;
|
||||
|
||||
if(counter_list.empty()) {
|
||||
count_list* node = new count_list(sv, facet_count, index);
|
||||
counter_list.emplace_back(node);
|
||||
} else {
|
||||
auto ind = 0;
|
||||
|
||||
for(; ind < counter_list.size(); ++ind) {
|
||||
if(counter_list[ind]->index == index) {
|
||||
counter_list[ind]->count = facet_count;
|
||||
if(ind > 1) {
|
||||
auto curr = ind;
|
||||
while (curr && (counter_list[curr-1]->count < counter_list[curr]->count)) {
|
||||
std::swap(counter_list[curr-1], counter_list[curr]);
|
||||
facet_index_map.at(counter_list[curr-1]->facet_value).count_list_index = curr-1;
|
||||
facet_index_map.at(counter_list[curr]->facet_value).count_list_index = curr;
|
||||
--curr;
|
||||
}
|
||||
}
|
||||
break;
|
||||
} else {
|
||||
LOG(ERROR) << "Wrong count_index stored for facet " << sv << " with index " << index;
|
||||
}
|
||||
}
|
||||
if(ind == counter_list.size()) {
|
||||
// LOG (INFO) << "inserting at last facet " << node.facet_value
|
||||
// << " with count " << node.count;
|
||||
count_list* node = new count_list(sv, facet_count, index);
|
||||
counter_list.emplace_back(node);
|
||||
}
|
||||
}
|
||||
|
||||
return index;
|
||||
}
|
||||
|
||||
bool facet_index_t::contains(const std::string& field) {
|
||||
|
@ -328,7 +328,7 @@ void Index::compute_token_offsets_facets(index_record& record,
|
||||
offsets);
|
||||
std::string val = document[field_name];
|
||||
if(!val.empty()) {
|
||||
facet_hashes[val].push_back(record.seq_id);
|
||||
facet_hashes[val].emplace_back(record.seq_id);
|
||||
}
|
||||
} else {
|
||||
tokenize_string_array(document[field_name], is_facet, the_field,
|
||||
@ -603,9 +603,12 @@ size_t Index::batch_memory_index(Index *index, std::vector<index_record>& iter_b
|
||||
found_fields.insert(kv.key());
|
||||
}
|
||||
|
||||
for(const auto& kv : index_rec.facet_hashes) {
|
||||
std::copy(kv.second.begin(), kv.second.end(),
|
||||
std::back_inserter(facet_hashes[kv.first]));
|
||||
for(auto& kv : index_rec.facet_hashes) {
|
||||
auto end = facet_hashes[kv.first].end();
|
||||
|
||||
facet_hashes[kv.first].insert(end, std::make_move_iterator(kv.second.begin()),
|
||||
std::make_move_iterator(kv.second.end()));
|
||||
kv.second.clear();
|
||||
}
|
||||
}
|
||||
|
||||
@ -758,7 +761,7 @@ void Index::index_field_in_memory(const field& afield, std::vector<index_record>
|
||||
continue;
|
||||
}
|
||||
|
||||
std::string value;
|
||||
std::string value="";
|
||||
uint32_t fhash = 0;
|
||||
|
||||
if(afield.facet) {
|
||||
@ -771,19 +774,10 @@ void Index::index_field_in_memory(const field& afield, std::vector<index_record>
|
||||
} else if(afield.type == field_types::INT64_ARRAY) {
|
||||
int64_t raw_val = document[afield.name][i].get<int64_t>();
|
||||
value = std::to_string(raw_val);
|
||||
auto it = count_index_map.find(value);
|
||||
if(it == count_index_map.end()) {
|
||||
count_index_map.emplace(value, ++count_index);
|
||||
}
|
||||
fhash = count_index_map.at(value);
|
||||
fhash = facet_index_v4->insert(afield.name, value, facet_hashes[value]);
|
||||
} else if(afield.type == field_types::STRING_ARRAY) {
|
||||
value = document[afield.name][i];
|
||||
auto it = count_index_map.find(value);
|
||||
if(it == count_index_map.end()) {
|
||||
count_index_map.emplace(value, ++count_index);
|
||||
}
|
||||
fhash = count_index_map.at(value);
|
||||
facet_index_v4->insert(afield.name, value, facet_hashes[value], fhash);
|
||||
fhash = facet_index_v4->insert(afield.name, value, facet_hashes[value], true);
|
||||
} else if(afield.type == field_types::FLOAT_ARRAY) {
|
||||
float raw_val = document[afield.name][i].get<float>();
|
||||
fhash = reinterpret_cast<uint32_t&>(raw_val);
|
||||
@ -797,9 +791,11 @@ void Index::index_field_in_memory(const field& afield, std::vector<index_record>
|
||||
}
|
||||
}
|
||||
|
||||
if(facet_index && facet_threshold_count > FACET_INDEX_THRESHOLD) {
|
||||
facet_index->upsert(seq_id, std::move(fhashvalues));
|
||||
fhashvalues.clear();
|
||||
if(facet_index!=nullptr) {
|
||||
if (facet_threshold_count > FACET_INDEX_THRESHOLD) {
|
||||
facet_index->upsert(seq_id, std::move(fhashvalues));
|
||||
fhashvalues.clear();
|
||||
}
|
||||
} else {
|
||||
LOG(ERROR) << "facet_index was null while inserting for facet " << afield.name;
|
||||
}
|
||||
@ -813,20 +809,11 @@ void Index::index_field_in_memory(const field& afield, std::vector<index_record>
|
||||
else if(afield.type == field_types::INT64) {
|
||||
int64_t raw_val = document[afield.name].get<int64_t>();
|
||||
value = std::to_string(raw_val);
|
||||
auto it = count_index_map.find(value);
|
||||
if(it == count_index_map.end()) {
|
||||
count_index_map.emplace(value, ++count_index);
|
||||
}
|
||||
fhash = count_index_map.at(value);
|
||||
fhash = facet_index_v4->insert(afield.name, value, facet_hashes[value]);
|
||||
}
|
||||
else if(afield.type == field_types::STRING) {
|
||||
value = document[afield.name];
|
||||
auto it = count_index_map.find(value);
|
||||
if(it == count_index_map.end()) {
|
||||
count_index_map.emplace(value, ++count_index);
|
||||
}
|
||||
fhash = count_index_map.at(value);
|
||||
facet_index_v4->insert(afield.name, value, facet_hashes[value], fhash);
|
||||
fhash = facet_index_v4->insert(afield.name, value, facet_hashes[value], true);
|
||||
}
|
||||
else if(afield.type == field_types::FLOAT) {
|
||||
float raw_val = document[afield.name].get<float>();
|
||||
@ -837,8 +824,10 @@ void Index::index_field_in_memory(const field& afield, std::vector<index_record>
|
||||
fhash = (uint32_t)raw_val;
|
||||
}
|
||||
|
||||
if(facet_index && facet_threshold_count > FACET_INDEX_THRESHOLD) {
|
||||
facet_index->upsert(seq_id, {fhash});
|
||||
if(facet_index!=nullptr) {
|
||||
if (facet_threshold_count > FACET_INDEX_THRESHOLD) {
|
||||
facet_index->upsert(seq_id, {fhash});
|
||||
}
|
||||
} else {
|
||||
LOG(ERROR) << "facet_index was null while inserting for facet " << afield.name;
|
||||
}
|
||||
@ -1213,7 +1202,7 @@ void Index::tokenize_string_array(const std::vector<std::string>& strings, bool
|
||||
}
|
||||
void Index::initialize_facet_indexes(const field& facet_field) {
|
||||
|
||||
if(facet_field.is_string()) {
|
||||
if(facet_field.is_string() || facet_field.is_int64()) {
|
||||
facet_index_v4->initialize(facet_field.name);
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user