mirror of
https://github.com/typesense/typesense.git
synced 2025-05-18 12:42:50 +08:00
Fix posting list deletion edge case.
This commit is contained in:
parent
d9b969aab5
commit
6ded5c7556
@ -186,7 +186,9 @@ bool posting_list_t::block_t::contains(uint32_t id) {
|
||||
/* posting_list_t operations */
|
||||
|
||||
posting_list_t::posting_list_t(uint16_t max_block_elements): BLOCK_MAX_ELEMENTS(max_block_elements) {
|
||||
|
||||
if(max_block_elements <= 1) {
|
||||
throw std::invalid_argument("max_block_elements must be > 1");
|
||||
}
|
||||
}
|
||||
|
||||
posting_list_t::~posting_list_t() {
|
||||
@ -423,7 +425,7 @@ void posting_list_t::upsert(const uint32_t id, const std::vector<uint32_t>& offs
|
||||
} else {
|
||||
const auto it = id_block_map.lower_bound(id);
|
||||
upsert_block = (it == id_block_map.end()) ? id_block_map.rbegin()->second : it->second;
|
||||
before_upsert_last_id = upsert_block->ids.at(upsert_block->size() - 1);
|
||||
before_upsert_last_id = upsert_block->ids.last();
|
||||
}
|
||||
|
||||
// happy path: upsert_block is not full
|
||||
@ -431,7 +433,7 @@ void posting_list_t::upsert(const uint32_t id, const std::vector<uint32_t>& offs
|
||||
uint32_t num_inserted = upsert_block->upsert(id, offsets);
|
||||
ids_length += num_inserted;
|
||||
|
||||
last_id_t after_upsert_last_id = upsert_block->ids.at(upsert_block->size() - 1);
|
||||
last_id_t after_upsert_last_id = upsert_block->ids.last();
|
||||
if(before_upsert_last_id != after_upsert_last_id) {
|
||||
id_block_map.erase(before_upsert_last_id);
|
||||
id_block_map.emplace(after_upsert_last_id, upsert_block);
|
||||
@ -451,12 +453,12 @@ void posting_list_t::upsert(const uint32_t id, const std::vector<uint32_t>& offs
|
||||
// evenly divide elements between both blocks
|
||||
split_block(upsert_block, new_block);
|
||||
|
||||
last_id_t after_upsert_last_id = upsert_block->ids.at(upsert_block->size() - 1);
|
||||
last_id_t after_upsert_last_id = upsert_block->ids.last();
|
||||
id_block_map.erase(before_upsert_last_id);
|
||||
id_block_map.emplace(after_upsert_last_id, upsert_block);
|
||||
}
|
||||
|
||||
last_id_t after_new_block_id = new_block->ids.at(new_block->size() - 1);
|
||||
last_id_t after_new_block_id = new_block->ids.last();
|
||||
id_block_map.emplace(after_new_block_id, new_block);
|
||||
|
||||
new_block->next = upsert_block->next;
|
||||
@ -485,6 +487,17 @@ void posting_list_t::erase(const uint32_t id) {
|
||||
// since we will be deleting the empty node, set the previous node's next pointer to null
|
||||
std::prev(it)->second->next = nullptr;
|
||||
delete erase_block;
|
||||
} else {
|
||||
// The root block cannot be empty if there are other blocks so we will pull some contents from next block
|
||||
// This is only an issue for blocks with max size of 2
|
||||
if(root_block.next != nullptr) {
|
||||
auto next_block_last_id = erase_block->next->ids.last();
|
||||
merge_adjacent_blocks(erase_block, erase_block->next, erase_block->next->size()/2);
|
||||
id_block_map.erase(next_block_last_id);
|
||||
|
||||
id_block_map.emplace(erase_block->next->ids.last(), erase_block->next);
|
||||
id_block_map.emplace(erase_block->ids.last(), erase_block);
|
||||
}
|
||||
}
|
||||
|
||||
id_block_map.erase(before_last_id);
|
||||
@ -493,7 +506,7 @@ void posting_list_t::erase(const uint32_t id) {
|
||||
}
|
||||
|
||||
if(new_ids_length >= BLOCK_MAX_ELEMENTS/2 || erase_block->next == nullptr) {
|
||||
last_id_t after_last_id = erase_block->ids.at(new_ids_length-1);
|
||||
last_id_t after_last_id = erase_block->ids.last();
|
||||
if(before_last_id != after_last_id) {
|
||||
id_block_map.erase(before_last_id);
|
||||
id_block_map.emplace(after_last_id, erase_block);
|
||||
@ -505,7 +518,7 @@ void posting_list_t::erase(const uint32_t id) {
|
||||
// block is less than 50% of max capacity and contains a next node which we can refill from
|
||||
|
||||
auto next_block = erase_block->next;
|
||||
last_id_t next_block_last_id = next_block->ids.at(next_block->ids.getLength()-1);
|
||||
last_id_t next_block_last_id = next_block->ids.last();
|
||||
|
||||
if(erase_block->size() + next_block->size() <= BLOCK_MAX_ELEMENTS) {
|
||||
// we can merge the contents of next block with `erase_block` and delete the next block
|
||||
@ -515,13 +528,15 @@ void posting_list_t::erase(const uint32_t id) {
|
||||
|
||||
id_block_map.erase(next_block_last_id);
|
||||
} else {
|
||||
// only part of the next block can be moved over
|
||||
size_t num_block2_ids = BLOCK_MAX_ELEMENTS - erase_block->size();
|
||||
// Only part of the next block can be moved over.
|
||||
// We will move only 50% of max elements to ensure that we don't end up "flipping" adjacent blocks:
|
||||
// 1, 5 -> 5, 1
|
||||
size_t num_block2_ids = BLOCK_MAX_ELEMENTS/2;
|
||||
merge_adjacent_blocks(erase_block, next_block, num_block2_ids);
|
||||
// NOTE: we don't have to update `id_block_map` for `next_block` as last element doesn't change
|
||||
}
|
||||
|
||||
last_id_t after_last_id = erase_block->ids.at(erase_block->ids.getLength()-1);
|
||||
last_id_t after_last_id = erase_block->ids.last();
|
||||
if(before_last_id != after_last_id) {
|
||||
id_block_map.erase(before_last_id);
|
||||
id_block_map.emplace(after_last_id, erase_block);
|
||||
|
@ -447,13 +447,13 @@ TEST_F(PostingListTest, RemovalsOnLaterBlocks) {
|
||||
// only part of the next node contents can be moved over when we delete 8 since (1 + 5) > 5
|
||||
pl.erase(8);
|
||||
|
||||
// [0..4], [9], [10..14] => [0..4], [9,10,11,12,13], [14]
|
||||
// [0..4], [9], [10..14] => [0..4], [9,10,11], [12,13,14]
|
||||
|
||||
ASSERT_EQ(3, pl.num_blocks());
|
||||
ASSERT_EQ(11, pl.num_ids());
|
||||
ASSERT_EQ(5, pl.get_root()->next->size());
|
||||
ASSERT_EQ(1, pl.get_root()->next->next->size());
|
||||
ASSERT_EQ(13, pl.get_root()->next->ids.last());
|
||||
ASSERT_EQ(3, pl.get_root()->next->size());
|
||||
ASSERT_EQ(3, pl.get_root()->next->next->size());
|
||||
ASSERT_EQ(11, pl.get_root()->next->ids.last());
|
||||
ASSERT_EQ(14, pl.get_root()->next->next->ids.last());
|
||||
|
||||
for(size_t i = 0; i < pl.get_root()->next->offset_index.getLength(); i++) {
|
||||
@ -616,27 +616,6 @@ TEST_F(PostingListTest, SplittingOfListsSimple) {
|
||||
std::vector<std::vector<posting_list_t::iterator_t>> partial_its_vec(4);
|
||||
intersector.split_lists(4, partial_its_vec);
|
||||
|
||||
/*for(size_t i = 0; i < partial_its_vec.size(); i++) {
|
||||
auto& partial_its = partial_its_vec[i];
|
||||
|
||||
if (partial_its.empty()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
LOG(INFO) << "Vec " << i;
|
||||
|
||||
for (auto& it: partial_its) {
|
||||
while (it.valid()) {
|
||||
LOG(INFO) << it.id();
|
||||
it.next();
|
||||
}
|
||||
|
||||
LOG(INFO) << "---";
|
||||
}
|
||||
}
|
||||
|
||||
return ;*/
|
||||
|
||||
std::vector<std::vector<std::vector<uint32_t>>> split_ids = {
|
||||
{{0, 2}, {1, 3}, {2, 3}},
|
||||
{{3, 20}, {1, 3, 5, 10, 20}, {2, 3, 5, 7, 20}}
|
||||
@ -1384,6 +1363,94 @@ TEST_F(PostingListTest, BlockIntersectionOnMixedLists) {
|
||||
free(list1);
|
||||
}
|
||||
|
||||
TEST_F(PostingListTest, InsertAndEraseSequence) {
|
||||
std::vector<uint32_t> offsets = {0, 1, 3};
|
||||
posting_list_t pl(5);
|
||||
|
||||
pl.upsert(0, offsets);
|
||||
pl.upsert(2, offsets);
|
||||
pl.upsert(4, offsets);
|
||||
pl.upsert(6, offsets);
|
||||
pl.upsert(8, offsets);
|
||||
|
||||
// this will cause a split of the root block
|
||||
pl.upsert(3, offsets); // 0,2,3 | 4,6,8
|
||||
pl.erase(0); // 2,3 | 4,6,8
|
||||
pl.upsert(5, offsets); // 2,3 | 4,5,6,8
|
||||
pl.upsert(7, offsets); // 2,3 | 4,5,6,7,8
|
||||
pl.upsert(10, offsets); // 2,3 | 4,5,6,7,8 | 10
|
||||
|
||||
// this will cause adjacent block refill
|
||||
pl.erase(2); // 3,4,5,6,7 | 8 | 10
|
||||
|
||||
// deletes second block
|
||||
pl.erase(8);
|
||||
|
||||
// remove all elements
|
||||
pl.erase(3);
|
||||
pl.erase(4);
|
||||
pl.erase(5);
|
||||
pl.erase(6);
|
||||
pl.erase(7);
|
||||
pl.erase(10);
|
||||
|
||||
ASSERT_EQ(0, pl.num_ids());
|
||||
}
|
||||
|
||||
TEST_F(PostingListTest, InsertAndEraseSequenceWithBlockSizeTwo) {
|
||||
std::vector<uint32_t> offsets = {0, 1, 3};
|
||||
posting_list_t pl(2);
|
||||
|
||||
pl.upsert(2, offsets);
|
||||
pl.upsert(3, offsets);
|
||||
pl.upsert(1, offsets); // inserting 2 again here? // inserting 4 here?
|
||||
|
||||
// 1 | 2,3
|
||||
|
||||
pl.erase(1);
|
||||
|
||||
ASSERT_EQ(1, pl.get_root()->size());
|
||||
ASSERT_EQ(2, pl.num_blocks());
|
||||
|
||||
pl.erase(3);
|
||||
pl.erase(2);
|
||||
|
||||
ASSERT_EQ(0, pl.get_root()->size());
|
||||
}
|
||||
|
||||
TEST_F(PostingListTest, PostingListMustHaveAtleast1Element) {
|
||||
try {
|
||||
std::vector<uint32_t> offsets = {0, 1, 3};
|
||||
posting_list_t pl(1);
|
||||
FAIL() << "Expected std::invalid_argument";
|
||||
}
|
||||
catch(std::invalid_argument const & err) {
|
||||
EXPECT_EQ(err.what(),std::string("max_block_elements must be > 1"));
|
||||
} catch(...) {
|
||||
FAIL() << "Expected std::invalid_argument";
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(PostingListTest, DISABLED_RandInsertAndErase) {
|
||||
std::vector<uint32_t> offsets = {0, 1, 3};
|
||||
posting_list_t pl(5);
|
||||
|
||||
time_t t;
|
||||
srand((unsigned) time(&t));
|
||||
|
||||
for(size_t i = 0; i < 10000; i++) {
|
||||
LOG(INFO) << "i: " << i;
|
||||
uint32_t add_id = rand() % 15;
|
||||
pl.upsert(add_id, offsets);
|
||||
|
||||
uint32_t del_id = rand() % 15;
|
||||
LOG(INFO) << "add: " << add_id << ", erase: " << del_id;
|
||||
pl.erase(del_id);
|
||||
}
|
||||
|
||||
LOG(INFO) << "Num ids: " << pl.num_ids() << ", num bocks: " << pl.num_blocks();
|
||||
}
|
||||
|
||||
TEST_F(PostingListTest, DISABLED_Benchmark) {
|
||||
std::vector<uint32_t> offsets = {0, 1, 3};
|
||||
posting_list_t pl(4096);
|
||||
|
Loading…
x
Reference in New Issue
Block a user