mirror of
https://github.com/typesense/typesense.git
synced 2025-05-19 21:22:25 +08:00
Add contains for posting list.
This commit is contained in:
parent
70f970b80c
commit
ef1badb077
@ -23,6 +23,8 @@ struct compact_posting_list_t {
|
||||
|
||||
posting_list_t* to_full_posting_list();
|
||||
|
||||
bool contains(uint32_t id);
|
||||
|
||||
int64_t upsert(uint32_t id, const std::vector<uint32_t>& offsets);
|
||||
int64_t upsert(uint32_t id, const uint32_t* offsets, uint32_t num_offsets);
|
||||
|
||||
@ -51,7 +53,16 @@ public:
|
||||
|
||||
static uint32_t first_id(const void* obj);
|
||||
|
||||
static bool contains(const void* obj, uint32_t id);
|
||||
|
||||
static void merge(const std::vector<void*>& posting_lists, std::vector<uint32_t>& result_ids);
|
||||
|
||||
static void intersect(const std::vector<void*>& posting_lists, std::vector<uint32_t>& result_ids);
|
||||
|
||||
static bool block_intersect(
|
||||
const std::vector<void*>& posting_lists,
|
||||
size_t batch_size,
|
||||
std::vector<posting_list_t::iterator_t>& its,
|
||||
posting_list_t::result_iter_state_t& iter_state
|
||||
);
|
||||
};
|
@ -29,7 +29,7 @@ public:
|
||||
// link to next block
|
||||
block_t* next = nullptr;
|
||||
|
||||
void insert_and_shift_offset_index(uint32_t index, uint32_t num_offsets);
|
||||
bool contains(uint32_t id);
|
||||
|
||||
void remove_and_shift_offset_index(const uint32_t* indices_sorted, uint32_t num_indices);
|
||||
|
||||
@ -123,6 +123,8 @@ public:
|
||||
|
||||
block_t* block_of(last_id_t id);
|
||||
|
||||
bool contains(uint32_t id);
|
||||
|
||||
iterator_t new_iterator();
|
||||
|
||||
static void merge(const std::vector<posting_list_t*>& posting_lists, std::vector<uint32_t>& result_ids);
|
||||
|
@ -192,6 +192,27 @@ uint32_t compact_posting_list_t::first_id() {
|
||||
return id_offsets[id_offsets[0] + 1];
|
||||
}
|
||||
|
||||
bool compact_posting_list_t::contains(uint32_t id) {
|
||||
size_t i = 0;
|
||||
while(i < length) {
|
||||
size_t num_existing_offsets = id_offsets[i];
|
||||
size_t existing_id = id_offsets[i + num_existing_offsets + 1];
|
||||
|
||||
if(existing_id > id) {
|
||||
// not found!
|
||||
return false;
|
||||
}
|
||||
|
||||
if(existing_id == id) {
|
||||
return true;
|
||||
}
|
||||
|
||||
i += num_existing_offsets + 2;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/* posting operations */
|
||||
|
||||
void posting_t::upsert(void*& obj, uint32_t id, const std::vector<uint32_t>& offsets) {
|
||||
@ -293,6 +314,16 @@ uint32_t posting_t::first_id(const void* obj) {
|
||||
}
|
||||
}
|
||||
|
||||
bool posting_t::contains(const void* obj, uint32_t id) {
|
||||
if(IS_COMPACT_POSTING(obj)) {
|
||||
compact_posting_list_t* list = COMPACT_POSTING_PTR(obj);
|
||||
return list->contains(id);
|
||||
} else {
|
||||
posting_list_t* list = (posting_list_t*) RAW_POSTING_PTR(obj);
|
||||
return list->contains(id);
|
||||
}
|
||||
}
|
||||
|
||||
void posting_t::merge(const std::vector<void*>& raw_posting_lists, std::vector<uint32_t>& result_ids) {
|
||||
// we will have to convert the compact posting list (if any) to full form
|
||||
std::vector<posting_list_t*> plists;
|
||||
@ -334,3 +365,22 @@ void posting_t::to_expanded_plists(const std::vector<void*>& raw_posting_lists,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool posting_t::block_intersect(const std::vector<void*>& raw_posting_lists, size_t batch_size,
|
||||
std::vector<posting_list_t::iterator_t>& its,
|
||||
posting_list_t::result_iter_state_t& iter_state) {
|
||||
// we will have to convert the compact posting list (if any) to full form
|
||||
std::vector<posting_list_t*> plists;
|
||||
std::vector<uint32_t> expanded_plist_indices;
|
||||
to_expanded_plists(raw_posting_lists, plists, expanded_plist_indices);
|
||||
|
||||
bool done = posting_list_t::block_intersect(plists, batch_size, its, iter_state);
|
||||
|
||||
if(done) {
|
||||
for(uint32_t expanded_plist_index: expanded_plist_indices) {
|
||||
delete plists[expanded_plist_index];
|
||||
}
|
||||
}
|
||||
|
||||
return done;
|
||||
}
|
||||
|
@ -4,26 +4,6 @@
|
||||
|
||||
/* block_t operations */
|
||||
|
||||
void posting_list_t::block_t::insert_and_shift_offset_index(const uint32_t index, const uint32_t num_offsets) {
|
||||
uint32_t existing_offset_index = offset_index.at(index);
|
||||
uint32_t length = offset_index.getLength();
|
||||
uint32_t new_length = length + 1;
|
||||
uint32_t* curr_array = offset_index.uncompress(new_length);
|
||||
|
||||
memmove(&curr_array[index+1], &curr_array[index], sizeof(uint32_t)*(length - index));
|
||||
curr_array[index] = existing_offset_index;
|
||||
|
||||
uint32_t curr_index = index + 1;
|
||||
while(curr_index < new_length) {
|
||||
curr_array[curr_index] += num_offsets;
|
||||
curr_index++;
|
||||
}
|
||||
|
||||
offset_index.load(curr_array, new_length);
|
||||
|
||||
delete [] curr_array;
|
||||
}
|
||||
|
||||
uint32_t posting_list_t::block_t::upsert(const uint32_t id, const std::vector<uint32_t>& positions) {
|
||||
if(id <= ids.last()) {
|
||||
// we have to check if `id` already exists, for an opportunity to do in-place updates
|
||||
@ -198,6 +178,10 @@ void posting_list_t::block_t::remove_and_shift_offset_index(const uint32_t* indi
|
||||
delete[] new_array;
|
||||
}
|
||||
|
||||
bool posting_list_t::block_t::contains(uint32_t id) {
|
||||
return ids.contains(id);
|
||||
}
|
||||
|
||||
/* posting_list_t operations */
|
||||
|
||||
posting_list_t::posting_list_t(uint16_t max_block_elements): BLOCK_MAX_ELEMENTS(max_block_elements) {
|
||||
@ -891,6 +875,17 @@ size_t posting_list_t::num_ids() {
|
||||
return ids_length;
|
||||
}
|
||||
|
||||
bool posting_list_t::contains(uint32_t id) {
|
||||
const auto it = id_block_map.lower_bound(id);
|
||||
|
||||
if(it == id_block_map.end()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
block_t* potential_block = it->second;
|
||||
return potential_block->contains(id);
|
||||
}
|
||||
|
||||
/* iterator_t operations */
|
||||
|
||||
posting_list_t::iterator_t::iterator_t(posting_list_t::block_t* root):
|
||||
|
@ -281,6 +281,7 @@ TEST(PostingListTest, RemovalsOnFirstBlock) {
|
||||
|
||||
// try to erase when posting list is empty
|
||||
pl.erase(0);
|
||||
ASSERT_FALSE(pl.contains(0));
|
||||
|
||||
ASSERT_EQ(0, pl.num_ids());
|
||||
ASSERT_EQ(0, pl.num_blocks());
|
||||
@ -305,6 +306,11 @@ TEST(PostingListTest, RemovalsOnFirstBlock) {
|
||||
ASSERT_EQ(2, pl.num_blocks());
|
||||
ASSERT_EQ(6, pl.num_ids());
|
||||
|
||||
ASSERT_TRUE(pl.contains(2));
|
||||
ASSERT_TRUE(pl.contains(5));
|
||||
ASSERT_FALSE(pl.contains(6));
|
||||
ASSERT_FALSE(pl.contains(1000));
|
||||
|
||||
// delete non-existing element
|
||||
pl.erase(1000);
|
||||
ASSERT_EQ(6, pl.num_ids());
|
||||
@ -743,6 +749,12 @@ TEST(PostingListTest, CompactPostingListUpsertAppends) {
|
||||
ASSERT_EQ(1002, list->last_id());
|
||||
ASSERT_EQ(3, list->num_ids());
|
||||
|
||||
ASSERT_TRUE(list->contains(0));
|
||||
ASSERT_TRUE(list->contains(1000));
|
||||
ASSERT_TRUE(list->contains(1002));
|
||||
ASSERT_FALSE(list->contains(500));
|
||||
ASSERT_FALSE(list->contains(2));
|
||||
|
||||
// no-op since the container expects resizing to be done outside
|
||||
list->upsert(1003, {1, 2});
|
||||
ASSERT_EQ(15, list->length);
|
||||
|
Loading…
x
Reference in New Issue
Block a user