refactor rev_iterator and add skip_to_rev()

This commit is contained in:
krunal 2023-10-06 20:44:33 +05:30
parent 0eeb583afd
commit 6b60b62afa
4 changed files with 51 additions and 35 deletions

View File

@ -628,7 +628,8 @@ public:
static float int64_t_to_float(int64_t n);
void get_distinct_id(const std::string& field_name, posting_list_t::iterator_t& facet_index_it,
const uint32_t seq_id, const bool group_missing_values, uint64_t& distinct_id) const;
const uint32_t seq_id, const bool group_missing_values, uint64_t& distinct_id,
bool is_reverse=false) const;
static void compute_token_offsets_facets(index_record& record,
const tsl::htrie_map<char, field>& search_schema,

View File

@ -91,7 +91,7 @@ public:
uint32_t* offsets = nullptr;
explicit iterator_t(const std::map<last_id_t, block_t*>* id_block_map,
block_t* start, block_t* end, bool auto_destroy = true, uint32_t field_id = 0);
block_t* start, block_t* end, bool auto_destroy = true, uint32_t field_id = 0, bool reverse = false);
~iterator_t();
iterator_t(iterator_t&& rhs) noexcept;
@ -100,8 +100,8 @@ public:
void reset_cache();
[[nodiscard]] bool valid() const;
void next();
void previous();
void skip_to(uint32_t id);
void skip_to_rev(uint32_t id);
void set_index(uint32_t index);
[[nodiscard]] uint32_t id() const;
[[nodiscard]] uint32_t last_block_id() const;

View File

@ -2403,8 +2403,7 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
if (group_limit != 0) {
distinct_id = 1;
for(auto& kv : group_by_field_it_vec) {
get_distinct_id(kv.field_name, kv.it, seq_id, group_missing_values, distinct_id);
kv.it.previous();
get_distinct_id(kv.field_name, kv.it, seq_id, group_missing_values, distinct_id, true);
}
if(excluded_group_ids.count(distinct_id) != 0) {
continue;
@ -6027,7 +6026,8 @@ void Index::score_results(const std::vector<sort_by> & sort_fields, const uint16
}
void Index::get_distinct_id(const std::string& field_name, posting_list_t::iterator_t& facet_index_it,
const uint32_t seq_id, const bool group_missing_values, uint64_t& distinct_id) const {
const uint32_t seq_id, const bool group_missing_values, uint64_t& distinct_id,
bool is_reverse) const {
if (!facet_index_it.valid()) {
if (!group_missing_values) {
distinct_id = seq_id;
@ -6036,7 +6036,11 @@ void Index::get_distinct_id(const std::string& field_name, posting_list_t::itera
}
// calculate hash from group_by_fields
std::vector<uint32_t> facet_hashes;
facet_index_it.skip_to(seq_id);
if(!is_reverse) {
facet_index_it.skip_to(seq_id);
} else {
facet_index_it.skip_to_rev(seq_id);
}
if (facet_index_it.valid() && facet_index_it.id() == seq_id) {
posting_list_t::get_offsets(facet_index_it, facet_hashes);

View File

@ -999,7 +999,7 @@ posting_list_t::iterator_t posting_list_t::new_rev_iterator() {
start_block = id_block_map.rbegin()->second;
}
auto rev_it = posting_list_t::iterator_t(&id_block_map, start_block, nullptr, true);
auto rev_it = posting_list_t::iterator_t(&id_block_map, start_block, nullptr, true, 0, true);
return rev_it;
}
@ -1652,7 +1652,7 @@ size_t posting_list_t::get_last_offset(const posting_list_t::iterator_t& it, boo
posting_list_t::iterator_t::iterator_t(const std::map<last_id_t, block_t*>* id_block_map,
posting_list_t::block_t* start, posting_list_t::block_t* end,
bool auto_destroy, uint32_t field_id):
bool auto_destroy, uint32_t field_id, bool reverse):
id_block_map(id_block_map), curr_block(start), curr_index(0), end_block(end),
auto_destroy(auto_destroy), field_id(field_id) {
@ -1661,6 +1661,10 @@ posting_list_t::iterator_t::iterator_t(const std::map<last_id_t, block_t*>* id_b
offset_index = curr_block->offset_index.uncompress();
offsets = curr_block->offsets.uncompress();
}
if(reverse) {
curr_index = curr_block->ids.getLength()-1;
}
}
bool posting_list_t::iterator_t::valid() const {
@ -1687,32 +1691,6 @@ void posting_list_t::iterator_t::next() {
}
}
void posting_list_t::iterator_t::previous() {
curr_index--;
if(curr_index < 0) {
// since block stores only the next pointer, we have to use `id_block_map` for reverse iteration
auto last_ele = ids[curr_block->size()-1];
auto it = id_block_map->find(last_ele);
if(it != id_block_map->end() && it != id_block_map->begin()) {
it--;
curr_block = it->second;
curr_index = curr_block->size()-1;
delete [] ids;
delete [] offset_index;
delete [] offsets;
ids = offset_index = offsets = nullptr;
ids = curr_block->ids.uncompress();
offset_index = curr_block->offset_index.uncompress();
offsets = curr_block->offsets.uncompress();
} else {
curr_block = end_block;
}
}
}
uint32_t posting_list_t::iterator_t::last_block_id() const {
auto size = curr_block->size();
if(size == 0) {
@ -1767,6 +1745,39 @@ void posting_list_t::iterator_t::skip_to(uint32_t id) {
}
}
void posting_list_t::iterator_t::skip_to_rev(uint32_t id) {
// first look to skip within current block
if(id >= this->last_block_id()) {
while(curr_index > 0 && this->id() > id) {
curr_index--;
}
return ;
}
// identify the block where the id could exist and skip to that
reset_cache();
const auto it = id_block_map->lower_bound(id);
if(it == id_block_map->end()) {
return;
}
curr_block = it->second;
curr_index = curr_block->size()-1;
ids = curr_block->ids.uncompress();
offset_index = curr_block->offset_index.uncompress();
offsets = curr_block->offsets.uncompress();
while(curr_index > 0 && this->id() > id) {
curr_index--;
}
if(curr_index == UINT32_MAX) {
reset_cache();
}
}
posting_list_t::iterator_t::~iterator_t() {
if(auto_destroy) {
reset_cache();