mirror of
https://github.com/typesense/typesense.git
synced 2025-05-21 22:33:27 +08:00
refactor rev_iterator and add skip_to_rev()
This commit is contained in:
parent
0eeb583afd
commit
6b60b62afa
@ -628,7 +628,8 @@ public:
|
||||
static float int64_t_to_float(int64_t n);
|
||||
|
||||
void get_distinct_id(const std::string& field_name, posting_list_t::iterator_t& facet_index_it,
|
||||
const uint32_t seq_id, const bool group_missing_values, uint64_t& distinct_id) const;
|
||||
const uint32_t seq_id, const bool group_missing_values, uint64_t& distinct_id,
|
||||
bool is_reverse=false) const;
|
||||
|
||||
static void compute_token_offsets_facets(index_record& record,
|
||||
const tsl::htrie_map<char, field>& search_schema,
|
||||
|
@ -91,7 +91,7 @@ public:
|
||||
uint32_t* offsets = nullptr;
|
||||
|
||||
explicit iterator_t(const std::map<last_id_t, block_t*>* id_block_map,
|
||||
block_t* start, block_t* end, bool auto_destroy = true, uint32_t field_id = 0);
|
||||
block_t* start, block_t* end, bool auto_destroy = true, uint32_t field_id = 0, bool reverse = false);
|
||||
~iterator_t();
|
||||
|
||||
iterator_t(iterator_t&& rhs) noexcept;
|
||||
@ -100,8 +100,8 @@ public:
|
||||
void reset_cache();
|
||||
[[nodiscard]] bool valid() const;
|
||||
void next();
|
||||
void previous();
|
||||
void skip_to(uint32_t id);
|
||||
void skip_to_rev(uint32_t id);
|
||||
void set_index(uint32_t index);
|
||||
[[nodiscard]] uint32_t id() const;
|
||||
[[nodiscard]] uint32_t last_block_id() const;
|
||||
|
@ -2403,8 +2403,7 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
|
||||
if (group_limit != 0) {
|
||||
distinct_id = 1;
|
||||
for(auto& kv : group_by_field_it_vec) {
|
||||
get_distinct_id(kv.field_name, kv.it, seq_id, group_missing_values, distinct_id);
|
||||
kv.it.previous();
|
||||
get_distinct_id(kv.field_name, kv.it, seq_id, group_missing_values, distinct_id, true);
|
||||
}
|
||||
if(excluded_group_ids.count(distinct_id) != 0) {
|
||||
continue;
|
||||
@ -6027,7 +6026,8 @@ void Index::score_results(const std::vector<sort_by> & sort_fields, const uint16
|
||||
}
|
||||
|
||||
void Index::get_distinct_id(const std::string& field_name, posting_list_t::iterator_t& facet_index_it,
|
||||
const uint32_t seq_id, const bool group_missing_values, uint64_t& distinct_id) const {
|
||||
const uint32_t seq_id, const bool group_missing_values, uint64_t& distinct_id,
|
||||
bool is_reverse) const {
|
||||
if (!facet_index_it.valid()) {
|
||||
if (!group_missing_values) {
|
||||
distinct_id = seq_id;
|
||||
@ -6036,7 +6036,11 @@ void Index::get_distinct_id(const std::string& field_name, posting_list_t::itera
|
||||
}
|
||||
// calculate hash from group_by_fields
|
||||
std::vector<uint32_t> facet_hashes;
|
||||
facet_index_it.skip_to(seq_id);
|
||||
if(!is_reverse) {
|
||||
facet_index_it.skip_to(seq_id);
|
||||
} else {
|
||||
facet_index_it.skip_to_rev(seq_id);
|
||||
}
|
||||
|
||||
if (facet_index_it.valid() && facet_index_it.id() == seq_id) {
|
||||
posting_list_t::get_offsets(facet_index_it, facet_hashes);
|
||||
|
@ -999,7 +999,7 @@ posting_list_t::iterator_t posting_list_t::new_rev_iterator() {
|
||||
start_block = id_block_map.rbegin()->second;
|
||||
}
|
||||
|
||||
auto rev_it = posting_list_t::iterator_t(&id_block_map, start_block, nullptr, true);
|
||||
auto rev_it = posting_list_t::iterator_t(&id_block_map, start_block, nullptr, true, 0, true);
|
||||
return rev_it;
|
||||
}
|
||||
|
||||
@ -1652,7 +1652,7 @@ size_t posting_list_t::get_last_offset(const posting_list_t::iterator_t& it, boo
|
||||
|
||||
posting_list_t::iterator_t::iterator_t(const std::map<last_id_t, block_t*>* id_block_map,
|
||||
posting_list_t::block_t* start, posting_list_t::block_t* end,
|
||||
bool auto_destroy, uint32_t field_id):
|
||||
bool auto_destroy, uint32_t field_id, bool reverse):
|
||||
id_block_map(id_block_map), curr_block(start), curr_index(0), end_block(end),
|
||||
auto_destroy(auto_destroy), field_id(field_id) {
|
||||
|
||||
@ -1661,6 +1661,10 @@ posting_list_t::iterator_t::iterator_t(const std::map<last_id_t, block_t*>* id_b
|
||||
offset_index = curr_block->offset_index.uncompress();
|
||||
offsets = curr_block->offsets.uncompress();
|
||||
}
|
||||
|
||||
if(reverse) {
|
||||
curr_index = curr_block->ids.getLength()-1;
|
||||
}
|
||||
}
|
||||
|
||||
bool posting_list_t::iterator_t::valid() const {
|
||||
@ -1687,32 +1691,6 @@ void posting_list_t::iterator_t::next() {
|
||||
}
|
||||
}
|
||||
|
||||
void posting_list_t::iterator_t::previous() {
|
||||
curr_index--;
|
||||
if(curr_index < 0) {
|
||||
// since block stores only the next pointer, we have to use `id_block_map` for reverse iteration
|
||||
auto last_ele = ids[curr_block->size()-1];
|
||||
auto it = id_block_map->find(last_ele);
|
||||
if(it != id_block_map->end() && it != id_block_map->begin()) {
|
||||
it--;
|
||||
curr_block = it->second;
|
||||
curr_index = curr_block->size()-1;
|
||||
|
||||
delete [] ids;
|
||||
delete [] offset_index;
|
||||
delete [] offsets;
|
||||
|
||||
ids = offset_index = offsets = nullptr;
|
||||
|
||||
ids = curr_block->ids.uncompress();
|
||||
offset_index = curr_block->offset_index.uncompress();
|
||||
offsets = curr_block->offsets.uncompress();
|
||||
} else {
|
||||
curr_block = end_block;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t posting_list_t::iterator_t::last_block_id() const {
|
||||
auto size = curr_block->size();
|
||||
if(size == 0) {
|
||||
@ -1767,6 +1745,39 @@ void posting_list_t::iterator_t::skip_to(uint32_t id) {
|
||||
}
|
||||
}
|
||||
|
||||
void posting_list_t::iterator_t::skip_to_rev(uint32_t id) {
|
||||
// first look to skip within current block
|
||||
if(id >= this->last_block_id()) {
|
||||
while(curr_index > 0 && this->id() > id) {
|
||||
curr_index--;
|
||||
}
|
||||
|
||||
return ;
|
||||
}
|
||||
|
||||
// identify the block where the id could exist and skip to that
|
||||
reset_cache();
|
||||
|
||||
const auto it = id_block_map->lower_bound(id);
|
||||
if(it == id_block_map->end()) {
|
||||
return;
|
||||
}
|
||||
|
||||
curr_block = it->second;
|
||||
curr_index = curr_block->size()-1;
|
||||
ids = curr_block->ids.uncompress();
|
||||
offset_index = curr_block->offset_index.uncompress();
|
||||
offsets = curr_block->offsets.uncompress();
|
||||
|
||||
while(curr_index > 0 && this->id() > id) {
|
||||
curr_index--;
|
||||
}
|
||||
|
||||
if(curr_index == UINT32_MAX) {
|
||||
reset_cache();
|
||||
}
|
||||
}
|
||||
|
||||
posting_list_t::iterator_t::~iterator_t() {
|
||||
if(auto_destroy) {
|
||||
reset_cache();
|
||||
|
Loading…
x
Reference in New Issue
Block a user