mirror of
https://github.com/typesense/typesense.git
synced 2025-05-19 21:22:25 +08:00
Track num ids of posting lists.
This commit is contained in:
parent
e695ba65c8
commit
a7db88c27e
@ -10,8 +10,9 @@
|
||||
#define COMPACT_POSTING_PTR(x) ((compact_posting_list_t*)((uintptr_t)x & ~1))
|
||||
|
||||
struct compact_posting_list_t {
|
||||
// use uint16_t to get 4 byte alignment for `id_offsets`
|
||||
uint16_t length = 0;
|
||||
// structured to get 4 byte alignment for `id_offsets`
|
||||
uint8_t length = 0;
|
||||
uint8_t ids_length = 0;
|
||||
uint16_t capacity = 0;
|
||||
|
||||
// format: num_offsets, offset1,..,offsetn, id1 | num_offsets, offset1,..,offsetn, id2
|
||||
@ -28,6 +29,8 @@ struct compact_posting_list_t {
|
||||
void erase(uint32_t id);
|
||||
|
||||
uint32_t last_id();
|
||||
|
||||
uint32_t num_ids() const;
|
||||
};
|
||||
|
||||
class posting_t {
|
||||
@ -40,4 +43,5 @@ public:
|
||||
|
||||
static void erase(void*& obj, uint32_t id);
|
||||
|
||||
static uint32_t num_ids(void*& obj);
|
||||
};
|
@ -30,9 +30,9 @@ public:
|
||||
|
||||
void remove_and_shift_offset_index(const uint32_t* indices_sorted, uint32_t num_indices);
|
||||
|
||||
void upsert(uint32_t id, const std::vector<uint32_t>& offsets);
|
||||
uint32_t upsert(uint32_t id, const std::vector<uint32_t>& offsets);
|
||||
|
||||
void erase(uint32_t id);
|
||||
uint32_t erase(uint32_t id);
|
||||
|
||||
uint32_t size() {
|
||||
return ids.getLength();
|
||||
@ -59,6 +59,7 @@ private:
|
||||
|
||||
// maximum number of IDs (and associated offsets) to store in each block before another block is created
|
||||
const uint16_t BLOCK_MAX_ELEMENTS;
|
||||
uint16_t ids_length = 0;
|
||||
|
||||
block_t root_block;
|
||||
|
||||
@ -97,7 +98,9 @@ public:
|
||||
|
||||
block_t* get_root();
|
||||
|
||||
size_t size();
|
||||
size_t num_blocks();
|
||||
|
||||
size_t num_ids();
|
||||
|
||||
block_t* block_of(last_id_t id);
|
||||
|
||||
|
@ -24,6 +24,7 @@ int64_t compact_posting_list_t::upsert(const uint32_t id, const uint32_t* offset
|
||||
}
|
||||
length += num_offsets;
|
||||
id_offsets[length++] = id;
|
||||
ids_length++;
|
||||
} else {
|
||||
// locate position and shift contents to make space available
|
||||
int64_t i = 0;
|
||||
@ -92,6 +93,7 @@ int64_t compact_posting_list_t::upsert(const uint32_t id, const uint32_t* offset
|
||||
|
||||
i += num_offsets;
|
||||
id_offsets[i++] = id;
|
||||
ids_length++;
|
||||
break;
|
||||
}
|
||||
|
||||
@ -128,6 +130,8 @@ void compact_posting_list_t::erase(const uint32_t id) {
|
||||
|
||||
i += num_existing_offsets + 2;
|
||||
}
|
||||
|
||||
ids_length--;
|
||||
}
|
||||
|
||||
compact_posting_list_t* compact_posting_list_t::create(uint32_t num_ids, uint32_t* ids, const uint32_t* offset_index,
|
||||
@ -140,6 +144,7 @@ compact_posting_list_t* compact_posting_list_t::create(uint32_t num_ids, uint32_
|
||||
|
||||
pl->length = 0;
|
||||
pl->capacity = length_required;
|
||||
pl->ids_length = 0;
|
||||
|
||||
for(size_t i = 0; i < num_ids; i++) {
|
||||
uint32_t start_offset = offset_index[i];
|
||||
@ -175,6 +180,10 @@ uint32_t compact_posting_list_t::last_id() {
|
||||
return (length == 0) ? UINT32_MAX : id_offsets[length - 1];
|
||||
}
|
||||
|
||||
uint32_t compact_posting_list_t::num_ids() const {
|
||||
return ids_length;
|
||||
}
|
||||
|
||||
/* posting operations */
|
||||
|
||||
void posting_t::upsert(void*& obj, uint32_t id, const std::vector<uint32_t>& offsets) {
|
||||
@ -235,7 +244,7 @@ void posting_t::erase(void*& obj, uint32_t id) {
|
||||
} else {
|
||||
posting_list_t* list = (posting_list_t*) RAW_POSTING_PTR(obj);
|
||||
list->erase(id);
|
||||
if(list->size() == 1 && list->get_root()->size() <= 10) {
|
||||
if(list->num_blocks() == 1 && list->get_root()->size() <= 10) {
|
||||
// convert to compact posting format
|
||||
auto root_block = list->get_root();
|
||||
auto ids = root_block->ids.uncompress();
|
||||
@ -256,3 +265,12 @@ void posting_t::erase(void*& obj, uint32_t id) {
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t posting_t::num_ids(void*& obj) {
|
||||
if(IS_COMPACT_POSTING(obj)) {
|
||||
compact_posting_list_t* list = (compact_posting_list_t*) RAW_POSTING_PTR(obj);
|
||||
return list->num_ids();
|
||||
} else {
|
||||
posting_list_t* list = (posting_list_t*) RAW_POSTING_PTR(obj);
|
||||
return list->num_ids();
|
||||
}
|
||||
}
|
||||
|
@ -24,7 +24,7 @@ void posting_list_t::block_t::insert_and_shift_offset_index(const uint32_t index
|
||||
delete [] curr_array;
|
||||
}
|
||||
|
||||
void posting_list_t::block_t::upsert(const uint32_t id, const std::vector<uint32_t>& positions) {
|
||||
uint32_t posting_list_t::block_t::upsert(const uint32_t id, const std::vector<uint32_t>& positions) {
|
||||
if(id <= ids.last()) {
|
||||
// we have to check if `id` already exists, for an opportunity to do in-place updates
|
||||
uint32_t id_index = ids.indexOf(id);
|
||||
@ -128,7 +128,7 @@ void posting_list_t::block_t::upsert(const uint32_t id, const std::vector<uint32
|
||||
}
|
||||
|
||||
delete [] curr_offsets;
|
||||
return;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
@ -140,13 +140,15 @@ void posting_list_t::block_t::upsert(const uint32_t id, const std::vector<uint32
|
||||
for(uint32_t position : positions) {
|
||||
offsets.append(position);
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
void posting_list_t::block_t::erase(const uint32_t id) {
|
||||
uint32_t posting_list_t::block_t::erase(const uint32_t id) {
|
||||
uint32_t doc_index = ids.indexOf(id);
|
||||
|
||||
if (doc_index == ids.getLength()) {
|
||||
return;
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t start_offset = offset_index.at(doc_index);
|
||||
@ -159,6 +161,8 @@ void posting_list_t::block_t::erase(const uint32_t id) {
|
||||
|
||||
offsets.remove_index(start_offset, end_offset);
|
||||
ids.remove_value(id);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
void posting_list_t::block_t::remove_and_shift_offset_index(const uint32_t* indices_sorted,
|
||||
@ -402,7 +406,9 @@ void posting_list_t::upsert(const uint32_t id, const std::vector<uint32_t>& offs
|
||||
|
||||
// happy path: upsert_block is not full
|
||||
if(upsert_block->size() < BLOCK_MAX_ELEMENTS) {
|
||||
upsert_block->upsert(id, offsets);
|
||||
uint32_t num_inserted = upsert_block->upsert(id, offsets);
|
||||
ids_length += num_inserted;
|
||||
|
||||
last_id_t after_upsert_last_id = upsert_block->ids.at(upsert_block->size() - 1);
|
||||
if(before_upsert_last_id != after_upsert_last_id) {
|
||||
id_block_map.erase(before_upsert_last_id);
|
||||
@ -413,10 +419,12 @@ void posting_list_t::upsert(const uint32_t id, const std::vector<uint32_t>& offs
|
||||
|
||||
if(upsert_block->next == nullptr && upsert_block->ids.last() < id) {
|
||||
// appending to the end of the last block where the id will reside on a newly block
|
||||
new_block->upsert(id, offsets);
|
||||
uint32_t num_inserted = new_block->upsert(id, offsets);
|
||||
ids_length += num_inserted;
|
||||
} else {
|
||||
// upsert and then split block
|
||||
upsert_block->upsert(id, offsets);
|
||||
uint32_t num_inserted = upsert_block->upsert(id, offsets);
|
||||
ids_length += num_inserted;
|
||||
|
||||
// evenly divide elements between both blocks
|
||||
split_block(upsert_block, new_block);
|
||||
@ -443,7 +451,8 @@ void posting_list_t::erase(const uint32_t id) {
|
||||
|
||||
block_t* erase_block = it->second;
|
||||
last_id_t before_last_id = it->first;
|
||||
erase_block->erase(id);
|
||||
uint32_t num_erased = erase_block->erase(id);
|
||||
ids_length -= num_erased;
|
||||
|
||||
size_t new_ids_length = erase_block->size();
|
||||
|
||||
@ -501,7 +510,7 @@ posting_list_t::block_t* posting_list_t::get_root() {
|
||||
return &root_block;
|
||||
}
|
||||
|
||||
size_t posting_list_t::size() {
|
||||
size_t posting_list_t::num_blocks() {
|
||||
return id_block_map.size();
|
||||
}
|
||||
|
||||
@ -622,6 +631,10 @@ void posting_list_t::advance_least2(std::vector<posting_list_t::iterator_t>& its
|
||||
}
|
||||
}
|
||||
|
||||
size_t posting_list_t::num_ids() {
|
||||
return ids_length;
|
||||
}
|
||||
|
||||
/* iterator_t operations */
|
||||
|
||||
posting_list_t::iterator_t::iterator_t(posting_list_t::block_t* root): block(root), index(0) {
|
||||
|
@ -22,7 +22,8 @@ TEST(PostingListTest, Insert) {
|
||||
|
||||
ASSERT_EQ(root->next->next->next, nullptr);
|
||||
|
||||
ASSERT_EQ(3, pl.size());
|
||||
ASSERT_EQ(3, pl.num_blocks());
|
||||
ASSERT_EQ(15, pl.num_ids());
|
||||
ASSERT_EQ(root, pl.block_of(4));
|
||||
ASSERT_EQ(root->next, pl.block_of(9));
|
||||
ASSERT_EQ(root->next->next, pl.block_of(14));
|
||||
@ -41,7 +42,8 @@ TEST(PostingListTest, Insert) {
|
||||
ASSERT_EQ(3, root->next->ids.getLength());
|
||||
|
||||
ASSERT_EQ(root->next->next, nullptr);
|
||||
ASSERT_EQ(2, pl2.size());
|
||||
ASSERT_EQ(2, pl2.num_blocks());
|
||||
ASSERT_EQ(8, pl2.num_ids());
|
||||
|
||||
ASSERT_EQ(root, pl2.block_of(8));
|
||||
ASSERT_EQ(root->next, pl2.block_of(14));
|
||||
@ -60,10 +62,12 @@ TEST(PostingListTest, Insert) {
|
||||
pl3.upsert(9, offsets);
|
||||
pl3.upsert(10, offsets);
|
||||
pl3.upsert(12, offsets);
|
||||
ASSERT_EQ(10, pl3.num_ids());
|
||||
|
||||
// [0,1,2,3,4], [6,8,9,10,12]
|
||||
pl3.upsert(5, offsets);
|
||||
ASSERT_EQ(3, pl3.size());
|
||||
ASSERT_EQ(3, pl3.num_blocks());
|
||||
ASSERT_EQ(11, pl3.num_ids());
|
||||
ASSERT_EQ(5, pl3.get_root()->ids.getLength());
|
||||
ASSERT_EQ(3, pl3.get_root()->next->ids.getLength());
|
||||
ASSERT_EQ(8, pl3.get_root()->next->ids.last());
|
||||
@ -93,7 +97,8 @@ TEST(PostingListTest, Insert) {
|
||||
|
||||
// [0,1,2,3,4], [6,8,9,10,12]
|
||||
pl4.upsert(11, offsets);
|
||||
ASSERT_EQ(3, pl4.size());
|
||||
ASSERT_EQ(3, pl4.num_blocks());
|
||||
ASSERT_EQ(11, pl4.num_ids());
|
||||
|
||||
ASSERT_EQ(5, pl4.get_root()->ids.getLength());
|
||||
ASSERT_EQ(3, pl4.get_root()->next->ids.getLength());
|
||||
@ -118,13 +123,15 @@ TEST(PostingListTest, InplaceUpserts) {
|
||||
pl.upsert(5, offsets);
|
||||
pl.upsert(7, offsets);
|
||||
|
||||
ASSERT_EQ(1, pl.size());
|
||||
ASSERT_EQ(1, pl.num_blocks());
|
||||
ASSERT_EQ(3, pl.num_ids());
|
||||
ASSERT_EQ(3, pl.get_root()->ids.getLength());
|
||||
ASSERT_EQ(9, pl.get_root()->offsets.getLength());
|
||||
|
||||
// update starting ID with same length of offsets
|
||||
pl.upsert(2, {1, 2, 4});
|
||||
ASSERT_EQ(1, pl.size());
|
||||
ASSERT_EQ(1, pl.num_blocks());
|
||||
ASSERT_EQ(3, pl.num_ids());
|
||||
ASSERT_EQ(3, pl.get_root()->ids.getLength());
|
||||
ASSERT_EQ(9, pl.get_root()->offsets.getLength());
|
||||
|
||||
@ -140,7 +147,8 @@ TEST(PostingListTest, InplaceUpserts) {
|
||||
|
||||
// update starting ID with smaller number of offsets
|
||||
pl.upsert(2, {5, 7});
|
||||
ASSERT_EQ(1, pl.size());
|
||||
ASSERT_EQ(1, pl.num_blocks());
|
||||
ASSERT_EQ(3, pl.num_ids());
|
||||
ASSERT_EQ(3, pl.get_root()->ids.getLength());
|
||||
ASSERT_EQ(8, pl.get_root()->offsets.getLength());
|
||||
|
||||
@ -156,7 +164,8 @@ TEST(PostingListTest, InplaceUpserts) {
|
||||
|
||||
// update starting ID with larger number of offsets
|
||||
pl.upsert(2, {0, 2, 8});
|
||||
ASSERT_EQ(1, pl.size());
|
||||
ASSERT_EQ(3, pl.num_ids());
|
||||
ASSERT_EQ(1, pl.num_blocks());
|
||||
ASSERT_EQ(3, pl.get_root()->ids.getLength());
|
||||
ASSERT_EQ(9, pl.get_root()->offsets.getLength());
|
||||
|
||||
@ -173,7 +182,8 @@ TEST(PostingListTest, InplaceUpserts) {
|
||||
|
||||
// update middle ID with smaller number of offsets
|
||||
pl.upsert(5, {1, 10});
|
||||
ASSERT_EQ(1, pl.size());
|
||||
ASSERT_EQ(3, pl.num_ids());
|
||||
ASSERT_EQ(1, pl.num_blocks());
|
||||
ASSERT_EQ(3, pl.get_root()->ids.getLength());
|
||||
ASSERT_EQ(8, pl.get_root()->offsets.getLength());
|
||||
|
||||
@ -192,7 +202,8 @@ TEST(PostingListTest, InplaceUpserts) {
|
||||
|
||||
// update middle ID with larger number of offsets
|
||||
pl.upsert(5, {2, 4, 12});
|
||||
ASSERT_EQ(1, pl.size());
|
||||
ASSERT_EQ(1, pl.num_blocks());
|
||||
ASSERT_EQ(3, pl.num_ids());
|
||||
ASSERT_EQ(3, pl.get_root()->ids.getLength());
|
||||
ASSERT_EQ(9, pl.get_root()->offsets.getLength());
|
||||
|
||||
@ -216,7 +227,8 @@ TEST(PostingListTest, InplaceUpserts) {
|
||||
// update last ID with smaller number of offsets
|
||||
|
||||
pl.upsert(7, {3});
|
||||
ASSERT_EQ(1, pl.size());
|
||||
ASSERT_EQ(1, pl.num_blocks());
|
||||
ASSERT_EQ(3, pl.num_ids());
|
||||
ASSERT_EQ(3, pl.get_root()->ids.getLength());
|
||||
ASSERT_EQ(7, pl.get_root()->offsets.getLength());
|
||||
|
||||
@ -238,7 +250,8 @@ TEST(PostingListTest, InplaceUpserts) {
|
||||
// update last ID with larger number of offsets
|
||||
|
||||
pl.upsert(7, {5, 20});
|
||||
ASSERT_EQ(1, pl.size());
|
||||
ASSERT_EQ(1, pl.num_blocks());
|
||||
ASSERT_EQ(3, pl.num_ids());
|
||||
ASSERT_EQ(3, pl.get_root()->ids.getLength());
|
||||
ASSERT_EQ(8, pl.get_root()->offsets.getLength());
|
||||
|
||||
@ -263,18 +276,22 @@ TEST(PostingListTest, RemovalsOnFirstBlock) {
|
||||
std::vector<uint32_t> offsets = {0, 1, 3};
|
||||
posting_list_t pl(5);
|
||||
|
||||
ASSERT_EQ(0, pl.size());
|
||||
ASSERT_EQ(0, pl.num_blocks());
|
||||
ASSERT_EQ(0, pl.num_ids());
|
||||
|
||||
// try to erase when posting list is empty
|
||||
pl.erase(0);
|
||||
|
||||
ASSERT_EQ(0, pl.size());
|
||||
ASSERT_EQ(0, pl.num_ids());
|
||||
ASSERT_EQ(0, pl.num_blocks());
|
||||
|
||||
// insert a single element and erase it
|
||||
pl.upsert(0, offsets);
|
||||
ASSERT_EQ(1, pl.size());
|
||||
ASSERT_EQ(1, pl.num_blocks());
|
||||
ASSERT_EQ(1, pl.num_ids());
|
||||
pl.erase(0);
|
||||
ASSERT_EQ(0, pl.size());
|
||||
ASSERT_EQ(0, pl.num_blocks());
|
||||
ASSERT_EQ(0, pl.num_ids());
|
||||
|
||||
ASSERT_EQ(0, pl.get_root()->ids.getLength());
|
||||
ASSERT_EQ(0, pl.get_root()->offset_index.getLength());
|
||||
@ -285,14 +302,17 @@ TEST(PostingListTest, RemovalsOnFirstBlock) {
|
||||
pl.upsert(i, offsets);
|
||||
}
|
||||
|
||||
ASSERT_EQ(2, pl.size());
|
||||
ASSERT_EQ(2, pl.num_blocks());
|
||||
ASSERT_EQ(6, pl.num_ids());
|
||||
|
||||
// delete non-existing element
|
||||
pl.erase(1000);
|
||||
ASSERT_EQ(6, pl.num_ids());
|
||||
|
||||
// delete elements from first block: blocks should not be merged until it falls below 50% occupancy
|
||||
pl.erase(1);
|
||||
ASSERT_EQ(2, pl.size());
|
||||
ASSERT_EQ(2, pl.num_blocks());
|
||||
ASSERT_EQ(5, pl.num_ids());
|
||||
|
||||
// [0, 2, 3, 4], [5]
|
||||
|
||||
@ -305,11 +325,12 @@ TEST(PostingListTest, RemovalsOnFirstBlock) {
|
||||
}
|
||||
|
||||
pl.erase(2);
|
||||
ASSERT_EQ(2, pl.size());
|
||||
ASSERT_EQ(2, pl.num_blocks());
|
||||
pl.erase(3);
|
||||
ASSERT_EQ(3, pl.num_ids());
|
||||
|
||||
// [0, 4], [5]
|
||||
ASSERT_EQ(2, pl.size());
|
||||
ASSERT_EQ(2, pl.num_blocks());
|
||||
ASSERT_EQ(2, pl.get_root()->size());
|
||||
ASSERT_EQ(1, pl.get_root()->next->size());
|
||||
ASSERT_EQ(pl.get_root(), pl.block_of(4));
|
||||
@ -327,7 +348,8 @@ TEST(PostingListTest, RemovalsOnFirstBlock) {
|
||||
|
||||
// [0, 5]
|
||||
// ensure that merge has happened
|
||||
ASSERT_EQ(1, pl.size());
|
||||
ASSERT_EQ(2, pl.num_ids());
|
||||
ASSERT_EQ(1, pl.num_blocks());
|
||||
ASSERT_EQ(pl.get_root(), pl.block_of(5));
|
||||
ASSERT_EQ(nullptr, pl.get_root()->next);
|
||||
ASSERT_EQ(2, pl.get_root()->size());
|
||||
@ -353,7 +375,8 @@ TEST(PostingListTest, RemovalsOnLaterBlocks) {
|
||||
// erase last element of last, non-first block
|
||||
|
||||
pl.erase(5);
|
||||
ASSERT_EQ(1, pl.size());
|
||||
ASSERT_EQ(5, pl.num_ids());
|
||||
ASSERT_EQ(1, pl.num_blocks());
|
||||
ASSERT_EQ(5, pl.get_root()->size());
|
||||
ASSERT_EQ(4, pl.get_root()->ids.last());
|
||||
ASSERT_EQ(nullptr, pl.get_root()->next);
|
||||
@ -368,7 +391,8 @@ TEST(PostingListTest, RemovalsOnLaterBlocks) {
|
||||
|
||||
// erase last element of the only block when block is atleast half full
|
||||
pl.erase(4);
|
||||
ASSERT_EQ(1, pl.size());
|
||||
ASSERT_EQ(4, pl.num_ids());
|
||||
ASSERT_EQ(1, pl.num_blocks());
|
||||
ASSERT_EQ(4, pl.get_root()->size());
|
||||
ASSERT_EQ(3, pl.get_root()->ids.last());
|
||||
ASSERT_EQ(pl.get_root(), pl.block_of(3));
|
||||
@ -382,6 +406,8 @@ TEST(PostingListTest, RemovalsOnLaterBlocks) {
|
||||
pl.erase(6);
|
||||
pl.erase(7);
|
||||
|
||||
ASSERT_EQ(12, pl.num_ids());
|
||||
|
||||
for(size_t i = 0; i < pl.get_root()->next->offset_index.getLength(); i++) {
|
||||
ASSERT_EQ(i * 3, pl.get_root()->next->offset_index.at(i));
|
||||
}
|
||||
@ -403,7 +429,8 @@ TEST(PostingListTest, RemovalsOnLaterBlocks) {
|
||||
|
||||
// [0..4], [9], [10..14] => [0..4], [9,10,11,12,13], [14]
|
||||
|
||||
ASSERT_EQ(3, pl.size());
|
||||
ASSERT_EQ(3, pl.num_blocks());
|
||||
ASSERT_EQ(11, pl.num_ids());
|
||||
ASSERT_EQ(5, pl.get_root()->next->size());
|
||||
ASSERT_EQ(1, pl.get_root()->next->next->size());
|
||||
ASSERT_EQ(13, pl.get_root()->next->ids.last());
|
||||
@ -437,7 +464,7 @@ TEST(PostingListTest, OutOfOrderUpserts) {
|
||||
pl.upsert(0, offsets);
|
||||
pl.upsert(200000, offsets);
|
||||
|
||||
ASSERT_EQ(2, pl.size());
|
||||
ASSERT_EQ(2, pl.num_blocks());
|
||||
|
||||
ASSERT_EQ(3, pl.get_root()->size());
|
||||
ASSERT_EQ(4, pl.get_root()->next->size());
|
||||
@ -485,8 +512,8 @@ TEST(PostingListTest, RandomInsertAndDeletes) {
|
||||
pl.erase(rand() % 100000);
|
||||
}
|
||||
|
||||
ASSERT_GT(pl.size(), 750);
|
||||
ASSERT_LT(pl.size(), 1000);
|
||||
ASSERT_GT(pl.num_blocks(), 750);
|
||||
ASSERT_LT(pl.num_blocks(), 1000);
|
||||
}
|
||||
|
||||
TEST(PostingListTest, IntersectionBasics) {
|
||||
@ -590,12 +617,14 @@ TEST(PostingListTest, CompactPostingListUpsertAppends) {
|
||||
ASSERT_EQ(15, list->length);
|
||||
ASSERT_EQ(15, list->capacity);
|
||||
ASSERT_EQ(1002, list->last_id());
|
||||
ASSERT_EQ(3, list->num_ids());
|
||||
|
||||
// no-op since the container expects resizing to be done outside
|
||||
list->upsert(1003, {1, 2});
|
||||
ASSERT_EQ(15, list->length);
|
||||
ASSERT_EQ(15, list->capacity);
|
||||
ASSERT_EQ(1002, list->last_id());
|
||||
ASSERT_EQ(3, list->num_ids());
|
||||
|
||||
// now resize
|
||||
void* obj = SET_COMPACT_POSTING(list);
|
||||
@ -604,6 +633,7 @@ TEST(PostingListTest, CompactPostingListUpsertAppends) {
|
||||
|
||||
ASSERT_EQ(19, (COMPACT_POSTING_PTR(obj))->length);
|
||||
ASSERT_EQ(24, (COMPACT_POSTING_PTR(obj))->capacity);
|
||||
ASSERT_EQ(4, (COMPACT_POSTING_PTR(obj))->ids_length);
|
||||
|
||||
// insert enough docs to NOT exceed compact posting list threshold
|
||||
posting_t::upsert(obj, 1004, {1, 2, 3, 4, 5, 6, 7, 8});
|
||||
@ -616,14 +646,17 @@ TEST(PostingListTest, CompactPostingListUpsertAppends) {
|
||||
ASSERT_EQ(1007, COMPACT_POSTING_PTR(obj)->last_id());
|
||||
ASSERT_TRUE(IS_COMPACT_POSTING(obj));
|
||||
ASSERT_EQ(1007, COMPACT_POSTING_PTR(obj)->last_id());
|
||||
ASSERT_EQ(8, (COMPACT_POSTING_PTR(obj))->ids_length);
|
||||
|
||||
// next upsert will exceed threshold
|
||||
posting_t::upsert(obj, 1008, {1, 2, 3, 4, 5, 6, 7, 8});
|
||||
ASSERT_FALSE(IS_COMPACT_POSTING(obj));
|
||||
|
||||
ASSERT_EQ(1, ((posting_list_t*)(obj))->size());
|
||||
ASSERT_EQ(1, ((posting_list_t*)(obj))->num_blocks());
|
||||
ASSERT_EQ(9, ((posting_list_t*)(obj))->get_root()->size());
|
||||
ASSERT_EQ(1008, ((posting_list_t*)(obj))->get_root()->ids.last());
|
||||
ASSERT_EQ(9, ((posting_list_t*)(obj))->get_root()->ids.getLength());
|
||||
ASSERT_EQ(9, ((posting_list_t*)(obj))->num_ids());
|
||||
|
||||
delete ((posting_list_t*)(obj));
|
||||
}
|
||||
@ -637,6 +670,7 @@ TEST(PostingListTest, CompactPostingListUpserts) {
|
||||
ASSERT_EQ(15, list->length);
|
||||
ASSERT_EQ(15, list->capacity);
|
||||
ASSERT_EQ(1002, list->last_id());
|
||||
ASSERT_EQ(3, list->num_ids());
|
||||
|
||||
// insert before first ID
|
||||
|
||||
@ -645,12 +679,14 @@ TEST(PostingListTest, CompactPostingListUpserts) {
|
||||
ASSERT_EQ(1002, COMPACT_POSTING_PTR(obj)->last_id());
|
||||
ASSERT_EQ(19, COMPACT_POSTING_PTR(obj)->length);
|
||||
ASSERT_EQ(24, COMPACT_POSTING_PTR(obj)->capacity);
|
||||
ASSERT_EQ(4, COMPACT_POSTING_PTR(obj)->num_ids());
|
||||
|
||||
// insert in the middle
|
||||
posting_t::upsert(obj, 999, {1, 2});
|
||||
ASSERT_EQ(1002, COMPACT_POSTING_PTR(obj)->last_id());
|
||||
ASSERT_EQ(23, COMPACT_POSTING_PTR(obj)->length);
|
||||
ASSERT_EQ(24, COMPACT_POSTING_PTR(obj)->capacity);
|
||||
ASSERT_EQ(5, COMPACT_POSTING_PTR(obj)->num_ids());
|
||||
|
||||
uint32_t expected_id_offsets[] = {
|
||||
2, 1, 2, 2,
|
||||
@ -678,6 +714,7 @@ TEST(PostingListTest, CompactPostingListUpdateWithLessOffsets) {
|
||||
ASSERT_EQ(15, list->length);
|
||||
ASSERT_EQ(15, list->capacity);
|
||||
ASSERT_EQ(1002, list->last_id());
|
||||
ASSERT_EQ(3, list->num_ids());
|
||||
|
||||
// update middle
|
||||
|
||||
@ -685,6 +722,7 @@ TEST(PostingListTest, CompactPostingListUpdateWithLessOffsets) {
|
||||
ASSERT_EQ(14, list->length);
|
||||
ASSERT_EQ(15, list->capacity);
|
||||
ASSERT_EQ(1002, list->last_id());
|
||||
ASSERT_EQ(3, list->num_ids());
|
||||
uint32_t expected_id_offsets[] = {3, 0, 3, 4, 0, 2, 1, 2, 1000, 3, 0, 3, 4, 1002};
|
||||
for(size_t i = 0; i < list->length; i++) {
|
||||
ASSERT_EQ(expected_id_offsets[i], list->id_offsets[i]);
|
||||
@ -695,6 +733,7 @@ TEST(PostingListTest, CompactPostingListUpdateWithLessOffsets) {
|
||||
ASSERT_EQ(13, list->length);
|
||||
ASSERT_EQ(15, list->capacity);
|
||||
ASSERT_EQ(1002, list->last_id());
|
||||
ASSERT_EQ(3, list->num_ids());
|
||||
uint32_t expected_id_offsets2[] = {2, 2, 4, 0, 2, 1, 2, 1000, 3, 0, 3, 4, 1002};
|
||||
for(size_t i = 0; i < list->length; i++) {
|
||||
ASSERT_EQ(expected_id_offsets2[i], list->id_offsets[i]);
|
||||
@ -705,6 +744,7 @@ TEST(PostingListTest, CompactPostingListUpdateWithLessOffsets) {
|
||||
ASSERT_EQ(12, list->length);
|
||||
ASSERT_EQ(15, list->capacity);
|
||||
ASSERT_EQ(1002, list->last_id());
|
||||
ASSERT_EQ(3, list->num_ids());
|
||||
uint32_t expected_id_offsets3[] = {2, 2, 4, 0, 2, 1, 2, 1000, 2, 2, 4, 1002};
|
||||
for(size_t i = 0; i < list->length; i++) {
|
||||
ASSERT_EQ(expected_id_offsets3[i], list->id_offsets[i]);
|
||||
@ -722,6 +762,7 @@ TEST(PostingListTest, CompactPostingListUpdateWithMoreOffsets) {
|
||||
ASSERT_EQ(15, list->length);
|
||||
ASSERT_EQ(15, list->capacity);
|
||||
ASSERT_EQ(1002, list->last_id());
|
||||
ASSERT_EQ(3, list->num_ids());
|
||||
|
||||
// update middle
|
||||
void* obj = SET_COMPACT_POSTING(list);
|
||||
@ -730,6 +771,7 @@ TEST(PostingListTest, CompactPostingListUpdateWithMoreOffsets) {
|
||||
ASSERT_EQ(16, list->length);
|
||||
ASSERT_EQ(20, list->capacity);
|
||||
ASSERT_EQ(1002, list->last_id());
|
||||
ASSERT_EQ(3, list->num_ids());
|
||||
uint32_t expected_id_offsets[] = {3, 0, 3, 4, 0, 4, 1, 2, 3, 4, 1000, 3, 0, 3, 4, 1002};
|
||||
for(size_t i = 0; i < list->length; i++) {
|
||||
ASSERT_EQ(expected_id_offsets[i], list->id_offsets[i]);
|
||||
@ -740,6 +782,7 @@ TEST(PostingListTest, CompactPostingListUpdateWithMoreOffsets) {
|
||||
ASSERT_EQ(17, list->length);
|
||||
ASSERT_EQ(20, list->capacity);
|
||||
ASSERT_EQ(1002, list->last_id());
|
||||
ASSERT_EQ(3, list->num_ids());
|
||||
uint32_t expected_id_offsets2[] = {4, 1, 2, 3, 4, 0, 4, 1, 2, 3, 4, 1000, 3, 0, 3, 4, 1002};
|
||||
for(size_t i = 0; i < list->length; i++) {
|
||||
ASSERT_EQ(expected_id_offsets2[i], list->id_offsets[i]);
|
||||
@ -750,6 +793,7 @@ TEST(PostingListTest, CompactPostingListUpdateWithMoreOffsets) {
|
||||
ASSERT_EQ(18, list->length);
|
||||
ASSERT_EQ(20, list->capacity);
|
||||
ASSERT_EQ(1002, list->last_id());
|
||||
ASSERT_EQ(3, list->num_ids());
|
||||
uint32_t expected_id_offsets3[] = {4, 1, 2, 3, 4, 0, 4, 1, 2, 3, 4, 1000, 4, 1, 2, 3, 4, 1002};
|
||||
for(size_t i = 0; i < list->length; i++) {
|
||||
ASSERT_EQ(expected_id_offsets3[i], list->id_offsets[i]);
|
||||
@ -770,11 +814,13 @@ TEST(PostingListTest, CompactPostingListErase) {
|
||||
ASSERT_EQ(15, list->length);
|
||||
ASSERT_EQ(15, list->capacity);
|
||||
ASSERT_EQ(1002, list->last_id());
|
||||
ASSERT_EQ(3, list->num_ids());
|
||||
|
||||
list->erase(1000);
|
||||
ASSERT_EQ(10, list->length);
|
||||
ASSERT_EQ(15, list->capacity);
|
||||
ASSERT_EQ(1002, list->last_id());
|
||||
ASSERT_EQ(2, list->num_ids());
|
||||
|
||||
// deleting using posting wrapper
|
||||
void* obj = SET_COMPACT_POSTING(list);
|
||||
@ -783,6 +829,7 @@ TEST(PostingListTest, CompactPostingListErase) {
|
||||
ASSERT_EQ(5, (COMPACT_POSTING_PTR(obj))->length);
|
||||
ASSERT_EQ(7, (COMPACT_POSTING_PTR(obj))->capacity);
|
||||
ASSERT_EQ(0, (COMPACT_POSTING_PTR(obj))->last_id());
|
||||
ASSERT_EQ(1, (COMPACT_POSTING_PTR(obj))->num_ids());
|
||||
|
||||
// upsert again
|
||||
posting_t::upsert(obj, 1002, {0, 3, 4});
|
||||
@ -790,6 +837,7 @@ TEST(PostingListTest, CompactPostingListErase) {
|
||||
ASSERT_EQ(10, list->length);
|
||||
ASSERT_EQ(13, list->capacity);
|
||||
ASSERT_EQ(1002, list->last_id());
|
||||
ASSERT_EQ(2, list->num_ids());
|
||||
|
||||
free(list);
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user