diff --git a/include/index.h b/include/index.h index 768643e1..79c49c8b 100644 --- a/include/index.h +++ b/include/index.h @@ -628,7 +628,8 @@ public: static float int64_t_to_float(int64_t n); void get_distinct_id(const std::string& field_name, posting_list_t::iterator_t& facet_index_it, - const uint32_t seq_id, const bool group_missing_values, uint64_t& distinct_id) const; + const uint32_t seq_id, const bool group_missing_values, uint64_t& distinct_id, + bool is_reverse=false) const; static void compute_token_offsets_facets(index_record& record, const tsl::htrie_map& search_schema, diff --git a/include/posting_list.h b/include/posting_list.h index 388991a1..6da4345d 100644 --- a/include/posting_list.h +++ b/include/posting_list.h @@ -91,7 +91,7 @@ public: uint32_t* offsets = nullptr; explicit iterator_t(const std::map* id_block_map, - block_t* start, block_t* end, bool auto_destroy = true, uint32_t field_id = 0); + block_t* start, block_t* end, bool auto_destroy = true, uint32_t field_id = 0, bool reverse = false); ~iterator_t(); iterator_t(iterator_t&& rhs) noexcept; @@ -100,8 +100,8 @@ public: void reset_cache(); [[nodiscard]] bool valid() const; void next(); - void previous(); void skip_to(uint32_t id); + void skip_to_rev(uint32_t id); void set_index(uint32_t index); [[nodiscard]] uint32_t id() const; [[nodiscard]] uint32_t last_block_id() const; diff --git a/src/index.cpp b/src/index.cpp index 799bec23..871361fe 100644 --- a/src/index.cpp +++ b/src/index.cpp @@ -2403,8 +2403,7 @@ Option Index::search(std::vector& field_query_tokens, cons if (group_limit != 0) { distinct_id = 1; for(auto& kv : group_by_field_it_vec) { - get_distinct_id(kv.field_name, kv.it, seq_id, group_missing_values, distinct_id); - kv.it.previous(); + get_distinct_id(kv.field_name, kv.it, seq_id, group_missing_values, distinct_id, true); } if(excluded_group_ids.count(distinct_id) != 0) { continue; @@ -6027,7 +6026,8 @@ void Index::score_results(const std::vector & sort_fields, const uint16 } void Index::get_distinct_id(const std::string& field_name, posting_list_t::iterator_t& facet_index_it, - const uint32_t seq_id, const bool group_missing_values, uint64_t& distinct_id) const { + const uint32_t seq_id, const bool group_missing_values, uint64_t& distinct_id, + bool is_reverse) const { if (!facet_index_it.valid()) { if (!group_missing_values) { distinct_id = seq_id; @@ -6036,7 +6036,11 @@ void Index::get_distinct_id(const std::string& field_name, posting_list_t::itera } // calculate hash from group_by_fields std::vector facet_hashes; - facet_index_it.skip_to(seq_id); + if(!is_reverse) { + facet_index_it.skip_to(seq_id); + } else { + facet_index_it.skip_to_rev(seq_id); + } if (facet_index_it.valid() && facet_index_it.id() == seq_id) { posting_list_t::get_offsets(facet_index_it, facet_hashes); diff --git a/src/posting_list.cpp b/src/posting_list.cpp index f722b6ea..038435ab 100644 --- a/src/posting_list.cpp +++ b/src/posting_list.cpp @@ -999,7 +999,7 @@ posting_list_t::iterator_t posting_list_t::new_rev_iterator() { start_block = id_block_map.rbegin()->second; } - auto rev_it = posting_list_t::iterator_t(&id_block_map, start_block, nullptr, true); + auto rev_it = posting_list_t::iterator_t(&id_block_map, start_block, nullptr, true, 0, true); return rev_it; } @@ -1652,7 +1652,7 @@ size_t posting_list_t::get_last_offset(const posting_list_t::iterator_t& it, boo posting_list_t::iterator_t::iterator_t(const std::map* id_block_map, posting_list_t::block_t* start, posting_list_t::block_t* end, - bool auto_destroy, uint32_t field_id): + bool auto_destroy, uint32_t field_id, bool reverse): id_block_map(id_block_map), curr_block(start), curr_index(0), end_block(end), auto_destroy(auto_destroy), field_id(field_id) { @@ -1661,6 +1661,10 @@ posting_list_t::iterator_t::iterator_t(const std::map* id_b offset_index = curr_block->offset_index.uncompress(); offsets = curr_block->offsets.uncompress(); } + + if(reverse) { + curr_index = curr_block->ids.getLength()-1; + } } bool posting_list_t::iterator_t::valid() const { @@ -1687,32 +1691,6 @@ void posting_list_t::iterator_t::next() { } } -void posting_list_t::iterator_t::previous() { - curr_index--; - if(curr_index < 0) { - // since block stores only the next pointer, we have to use `id_block_map` for reverse iteration - auto last_ele = ids[curr_block->size()-1]; - auto it = id_block_map->find(last_ele); - if(it != id_block_map->end() && it != id_block_map->begin()) { - it--; - curr_block = it->second; - curr_index = curr_block->size()-1; - - delete [] ids; - delete [] offset_index; - delete [] offsets; - - ids = offset_index = offsets = nullptr; - - ids = curr_block->ids.uncompress(); - offset_index = curr_block->offset_index.uncompress(); - offsets = curr_block->offsets.uncompress(); - } else { - curr_block = end_block; - } - } -} - uint32_t posting_list_t::iterator_t::last_block_id() const { auto size = curr_block->size(); if(size == 0) { @@ -1767,6 +1745,39 @@ void posting_list_t::iterator_t::skip_to(uint32_t id) { } } +void posting_list_t::iterator_t::skip_to_rev(uint32_t id) { + // first look to skip within current block + if(id >= this->last_block_id()) { + while(curr_index > 0 && this->id() > id) { + curr_index--; + } + + return ; + } + + // identify the block where the id could exist and skip to that + reset_cache(); + + const auto it = id_block_map->lower_bound(id); + if(it == id_block_map->end()) { + return; + } + + curr_block = it->second; + curr_index = curr_block->size()-1; + ids = curr_block->ids.uncompress(); + offset_index = curr_block->offset_index.uncompress(); + offsets = curr_block->offsets.uncompress(); + + while(curr_index > 0 && this->id() > id) { + curr_index--; + } + + if(curr_index == UINT32_MAX) { + reset_cache(); + } +} + posting_list_t::iterator_t::~iterator_t() { if(auto_destroy) { reset_cache();