mirror of
https://github.com/typesense/typesense.git
synced 2025-05-21 06:02:26 +08:00
Fix an edge case in string update.
This commit is contained in:
parent
2534b1f798
commit
13622ff038
@ -33,6 +33,8 @@ public:
|
||||
|
||||
void remove_and_shift_offset_index(const uint32_t* indices_sorted, uint32_t num_indices);
|
||||
|
||||
void insert_and_shift_offset_index(const uint32_t index, const uint32_t num_offsets);
|
||||
|
||||
uint32_t upsert(uint32_t id, const std::vector<uint32_t>& offsets);
|
||||
|
||||
uint32_t erase(uint32_t id);
|
||||
@ -61,6 +63,7 @@ public:
|
||||
[[nodiscard]] bool valid() const;
|
||||
void next();
|
||||
void skip_to(uint32_t id);
|
||||
void set_index(uint32_t index);
|
||||
[[nodiscard]] uint32_t id() const;
|
||||
[[nodiscard]] inline uint32_t index() const;
|
||||
[[nodiscard]] inline block_t* block() const;
|
||||
@ -129,6 +132,8 @@ public:
|
||||
|
||||
void erase(uint32_t id);
|
||||
|
||||
void dump();
|
||||
|
||||
block_t* get_root();
|
||||
|
||||
size_t num_blocks() const;
|
||||
|
@ -6,11 +6,29 @@
|
||||
/* block_t operations */
|
||||
|
||||
uint32_t posting_list_t::block_t::upsert(const uint32_t id, const std::vector<uint32_t>& positions) {
|
||||
if(id <= ids.last() && ids.getLength() != 0) {
|
||||
// we have to check if `id` already exists, for an opportunity to do in-place updates
|
||||
if(id > ids.last() || ids.getLength() == 0) {
|
||||
// append to the end
|
||||
ids.append(id);
|
||||
uint32_t curr_index = offsets.getLength();
|
||||
offset_index.append(curr_index);
|
||||
for(uint32_t position : positions) {
|
||||
offsets.append(position);
|
||||
}
|
||||
}
|
||||
|
||||
else {
|
||||
// we have to check if `id` already exists, and do in-place update/insert
|
||||
uint32_t id_index = ids.indexOf(id);
|
||||
|
||||
if(id_index != ids.getLength()) {
|
||||
if(id_index == ids.getLength()) {
|
||||
// id not found, we have to insert it
|
||||
size_t inserted_index = ids.append(id);
|
||||
uint32_t existing_offset_index = offset_index.at(inserted_index);
|
||||
insert_and_shift_offset_index(inserted_index, positions.size());
|
||||
offsets.insert(existing_offset_index, &positions[0], positions.size());
|
||||
}
|
||||
|
||||
else {
|
||||
// id is already present, so we will only update offset index and offsets
|
||||
uint32_t start_offset_index = offset_index.at(id_index);
|
||||
uint32_t end_offset_index = (id == ids.last()) ? offsets.getLength()-1 : offset_index.at(id_index + 1)-1;
|
||||
@ -113,15 +131,6 @@ uint32_t posting_list_t::block_t::upsert(const uint32_t id, const std::vector<ui
|
||||
}
|
||||
}
|
||||
|
||||
// treat as regular append (either id not found or exceeds max id)
|
||||
|
||||
ids.append(id);
|
||||
uint32_t curr_index = offsets.getLength();
|
||||
offset_index.append(curr_index);
|
||||
for(uint32_t position : positions) {
|
||||
offsets.append(position);
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -179,6 +188,26 @@ void posting_list_t::block_t::remove_and_shift_offset_index(const uint32_t* indi
|
||||
delete[] new_array;
|
||||
}
|
||||
|
||||
void posting_list_t::block_t::insert_and_shift_offset_index(const uint32_t index, const uint32_t num_offsets) {
|
||||
uint32_t existing_offset_index = offset_index.at(index);
|
||||
uint32_t length = offset_index.getLength();
|
||||
uint32_t new_length = length + 1;
|
||||
uint32_t *curr_array = offset_index.uncompress(new_length);
|
||||
|
||||
memmove(&curr_array[index+1], &curr_array[index], sizeof(uint32_t)*(length - index));
|
||||
curr_array[index] = existing_offset_index;
|
||||
|
||||
uint32_t curr_index = index + 1;
|
||||
while(curr_index < new_length) {
|
||||
curr_array[curr_index] += num_offsets;
|
||||
curr_index++;
|
||||
}
|
||||
|
||||
offset_index.load(curr_array, new_length);
|
||||
|
||||
delete [] curr_array;
|
||||
}
|
||||
|
||||
bool posting_list_t::block_t::contains(uint32_t id) {
|
||||
return ids.contains(id);
|
||||
}
|
||||
@ -467,6 +496,41 @@ void posting_list_t::upsert(const uint32_t id, const std::vector<uint32_t>& offs
|
||||
}
|
||||
}
|
||||
|
||||
void posting_list_t::dump() {
|
||||
auto it = new_iterator();
|
||||
|
||||
std::string ids_str;
|
||||
std::string offset_index_str;
|
||||
std::string offsets_str;
|
||||
|
||||
while(it.valid()) {
|
||||
auto index = it.index();
|
||||
while(index < it.block()->size()) {
|
||||
ids_str += std::to_string(it.ids[index]) + ", ";
|
||||
offset_index_str += std::to_string(it.offset_index[index]) + ", ";
|
||||
index++;
|
||||
}
|
||||
|
||||
auto last_offset_index = it.offset_index[it.block()->size()-1];
|
||||
|
||||
for(size_t j = 0; j <= last_offset_index; j++) {
|
||||
offsets_str += std::to_string(it.offsets[j]) + ", ";
|
||||
}
|
||||
|
||||
it.set_index(it.block()->size()-1);
|
||||
it.next();
|
||||
}
|
||||
|
||||
LOG(INFO) << "ids_str:";
|
||||
LOG(INFO) << ids_str;
|
||||
|
||||
LOG(INFO) << "offset_index_str:";
|
||||
LOG(INFO) << offset_index_str;
|
||||
|
||||
LOG(INFO) << "offsets_str:";
|
||||
LOG(INFO) << offsets_str;
|
||||
}
|
||||
|
||||
void posting_list_t::erase(const uint32_t id) {
|
||||
const auto it = id_block_map.lower_bound(id);
|
||||
|
||||
@ -1354,3 +1418,7 @@ posting_list_t::iterator_t::iterator_t(iterator_t&& rhs) noexcept {
|
||||
rhs.offset_index = nullptr;
|
||||
rhs.offsets = nullptr;
|
||||
}
|
||||
|
||||
void posting_list_t::iterator_t::set_index(uint32_t index) {
|
||||
curr_index = index;
|
||||
}
|
||||
|
@ -129,6 +129,26 @@ TEST_F(PostingListTest, Insert) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(PostingListTest, InsertInMiddle) {
|
||||
posting_list_t pl(3);
|
||||
|
||||
pl.upsert(1, {1});
|
||||
pl.upsert(3, {3});
|
||||
pl.upsert(2, {2});
|
||||
|
||||
ASSERT_EQ(1, pl.get_root()->ids.at(0));
|
||||
ASSERT_EQ(2, pl.get_root()->ids.at(1));
|
||||
ASSERT_EQ(3, pl.get_root()->ids.at(2));
|
||||
|
||||
ASSERT_EQ(0, pl.get_root()->offset_index.at(0));
|
||||
ASSERT_EQ(1, pl.get_root()->offset_index.at(1));
|
||||
ASSERT_EQ(2, pl.get_root()->offset_index.at(2));
|
||||
|
||||
ASSERT_EQ(1, pl.get_root()->offsets.at(0));
|
||||
ASSERT_EQ(2, pl.get_root()->offsets.at(1));
|
||||
ASSERT_EQ(3, pl.get_root()->offsets.at(2));
|
||||
}
|
||||
|
||||
TEST_F(PostingListTest, InplaceUpserts) {
|
||||
std::vector<uint32_t> offsets = {1, 2, 3};
|
||||
posting_list_t pl(5);
|
||||
|
Loading…
x
Reference in New Issue
Block a user