From 0114eb9b174f3da15ecd4447590145f547fbdd95 Mon Sep 17 00:00:00 2001 From: Harpreet Sangar Date: Mon, 17 Apr 2023 20:54:50 +0530 Subject: [PATCH] Add numeric field support in `filter_result_t`. --- include/num_tree.h | 10 +++++ src/filter_result_iterator.cpp | 23 +++++++++- src/num_tree.cpp | 78 ++++++++++++++++++++++++++++++++++ 3 files changed, 109 insertions(+), 2 deletions(-) diff --git a/include/num_tree.h b/include/num_tree.h index 2170a30e..444f6266 100644 --- a/include/num_tree.h +++ b/include/num_tree.h @@ -30,6 +30,11 @@ public: void range_inclusive_search(int64_t start, int64_t end, uint32_t** ids, size_t& ids_len); + void range_inclusive_search_iterators(int64_t start, + int64_t end, + std::vector& id_list_iterators, + std::vector& expanded_id_lists); + void approx_range_inclusive_search_count(int64_t start, int64_t end, uint32_t& ids_len); void range_inclusive_contains(const int64_t& start, const int64_t& end, @@ -42,6 +47,11 @@ public: void search(NUM_COMPARATOR comparator, int64_t value, uint32_t** ids, size_t& ids_len); + void search_iterators(NUM_COMPARATOR comparator, + int64_t value, + std::vector& id_list_iterators, + std::vector& expanded_id_lists); + void approx_search_count(NUM_COMPARATOR comparator, int64_t value, uint32_t& ids_len); void remove(uint64_t value, uint32_t id); diff --git a/src/filter_result_iterator.cpp b/src/filter_result_iterator.cpp index a6b61134..ff9cab1a 100644 --- a/src/filter_result_iterator.cpp +++ b/src/filter_result_iterator.cpp @@ -447,7 +447,16 @@ void filter_result_iterator_t::next() { field f = index->search_schema.at(a_filter.field_name); - if (f.is_string()) { + if (f.is_integer() || f.is_float() || f.is_bool()) { + result_index++; + if (result_index >= filter_result.count) { + is_valid = false; + return; + } + + seq_id = filter_result.docs[result_index]; + return; + } else if (f.is_string()) { if (filter_node->filter_exp.apply_not_equals) { if (++seq_id < result_index) { return; @@ -987,7 +996,17 @@ void filter_result_iterator_t::skip_to(uint32_t id) { field f = index->search_schema.at(a_filter.field_name); - if (f.is_string()) { + if (f.is_integer() || f.is_float() || f.is_bool()) { + while(result_index < filter_result.count && filter_result.docs[result_index] < id) { + result_index++; + } + + if (result_index >= filter_result.count) { + is_valid = false; + } + + return; + } else if (f.is_string()) { if (filter_node->filter_exp.apply_not_equals) { if (id < seq_id) { return; diff --git a/src/num_tree.cpp b/src/num_tree.cpp index c59cb008..89c5e3a0 100644 --- a/src/num_tree.cpp +++ b/src/num_tree.cpp @@ -43,6 +43,30 @@ void num_tree_t::range_inclusive_search(int64_t start, int64_t end, uint32_t** i *ids = out; } +void num_tree_t::range_inclusive_search_iterators(int64_t start, + int64_t end, + std::vector& id_list_iterators, + std::vector& expanded_id_lists) { + if (int64map.empty()) { + return; + } + + auto it_start = int64map.lower_bound(start); // iter values will be >= start + + std::vector raw_id_lists; + while (it_start != int64map.end() && it_start->first <= end) { + raw_id_lists.push_back(it_start->second); + it_start++; + } + + std::vector id_lists; + ids_t::to_expanded_id_lists(raw_id_lists, id_lists, expanded_id_lists); + + for (const auto &id_list: id_lists) { + id_list_iterators.emplace_back(id_list->new_iterator()); + } +} + void num_tree_t::approx_range_inclusive_search_count(int64_t start, int64_t end, uint32_t& ids_len) { if (int64map.empty()) { return; @@ -187,6 +211,60 @@ void num_tree_t::search(NUM_COMPARATOR comparator, int64_t value, uint32_t** ids } } +void num_tree_t::search_iterators(NUM_COMPARATOR comparator, + int64_t value, + std::vector& id_list_iterators, + std::vector& expanded_id_lists) { + if (int64map.empty()) { + return ; + } + + std::vector raw_id_lists; + if (comparator == EQUALS) { + const auto& it = int64map.find(value); + if (it != int64map.end()) { + raw_id_lists.emplace_back(it->second); + } + } else if (comparator == GREATER_THAN || comparator == GREATER_THAN_EQUALS) { + // iter entries will be >= value, or end() if all entries are before value + auto iter_ge_value = int64map.lower_bound(value); + + if(iter_ge_value == int64map.end()) { + return ; + } + + if(comparator == GREATER_THAN && iter_ge_value->first == value) { + iter_ge_value++; + } + + while(iter_ge_value != int64map.end()) { + raw_id_lists.emplace_back(iter_ge_value->second); + iter_ge_value++; + } + } else if(comparator == LESS_THAN || comparator == LESS_THAN_EQUALS) { + // iter entries will be >= value, or end() if all entries are before value + auto iter_ge_value = int64map.lower_bound(value); + + auto it = int64map.begin(); + while(it != iter_ge_value) { + raw_id_lists.emplace_back(it->second); + it++; + } + + // for LESS_THAN_EQUALS, check if last iter entry is equal to value + if(it != int64map.end() && comparator == LESS_THAN_EQUALS && it->first == value) { + raw_id_lists.emplace_back(it->second); + } + } + + std::vector id_lists; + ids_t::to_expanded_id_lists(raw_id_lists, id_lists, expanded_id_lists); + + for (const auto &id_list: id_lists) { + id_list_iterators.emplace_back(id_list->new_iterator()); + } +} + void num_tree_t::approx_search_count(NUM_COMPARATOR comparator, int64_t value, uint32_t& ids_len) { if (int64map.empty()) { return;