From 2264dfb36413d7b01f2027b8dd60d3a3b6fa0705 Mon Sep 17 00:00:00 2001 From: Harpreet Sangar Date: Mon, 3 Jul 2023 20:41:45 +0530 Subject: [PATCH] Add `or_iterator_t::contains_atleast_one`. --- include/or_iterator.h | 2 + src/art.cpp | 39 +++++++++++++----- src/or_iterator.cpp | 94 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 124 insertions(+), 11 deletions(-) diff --git a/include/or_iterator.h b/include/or_iterator.h index c4f27518..b10ceccf 100644 --- a/include/or_iterator.h +++ b/include/or_iterator.h @@ -50,6 +50,8 @@ public: template static bool intersect(std::vector& its, result_iter_state_t& istate, T func); + + static bool contains_atleast_one(std::vector& its, result_iter_state_t&& istate); }; template diff --git a/src/art.cpp b/src/art.cpp index 38519d16..60890d27 100644 --- a/src/art.cpp +++ b/src/art.cpp @@ -19,6 +19,7 @@ #include #include #include +#include #include "art.h" #include "logger.h" #include "array_utils.h" @@ -1000,6 +1001,24 @@ bool validate_and_add_leaf(art_leaf* leaf, const bool last_token, const std::str return true; } +void leaf_values_iterator(art_leaf*& leaf, std::vector& or_iterators, + std::vector& expanded_plists) { + if(IS_COMPACT_POSTING(leaf->values)) { + auto compact_posting_list = COMPACT_POSTING_PTR(leaf->values); + posting_list_t* full_posting_list = compact_posting_list->to_full_posting_list(); + expanded_plists.emplace_back(full_posting_list); + + std::vector its; + its.push_back(full_posting_list->new_iterator(nullptr, nullptr, 0)); + or_iterators.emplace_back(or_iterator_t(its)); + } else { + posting_list_t* full_posting_list = (posting_list_t*)(leaf->values); + std::vector its; + its.push_back(full_posting_list->new_iterator(nullptr, nullptr, 0)); + or_iterators.emplace_back(or_iterator_t(its)); + } +} + bool validate_and_add_leaf(art_leaf* leaf, const std::string& prev_token, const art_leaf* prev_leaf, const std::vector& prev_leaf_ids, @@ -1020,20 +1039,18 @@ bool validate_and_add_leaf(art_leaf* leaf, if (filter_result_iterator->is_valid && !filter_result_iterator->contains_atleast_one(leaf->values)) { return false; } - } else if (!filter_result_iterator->is_valid) { - if (!posting_t::contains_atleast_one(leaf->values, prev_leaf_ids.data(), prev_leaf_ids.size())) { - return false; - } } else { - std::vector leaf_ids; - leaf_ids.insert(leaf_ids.end(), prev_leaf_ids.begin(), prev_leaf_ids.end()); - posting_t::merge({leaf->values}, leaf_ids); + std::vector or_iterators; + std::vector expanded_plists; - bool found = false; - for (uint32_t i = 0; i < leaf_ids.size() && filter_result_iterator->is_valid && !found; i++) { - found = (filter_result_iterator->valid(leaf_ids[i]) == 1); + leaf_values_iterator(const_cast(prev_leaf), or_iterators, expanded_plists); + leaf_values_iterator(leaf, or_iterators, expanded_plists); + + auto found = or_iterator_t::contains_atleast_one(or_iterators, result_iter_state_t(nullptr, 0, filter_result_iterator)); + + for (auto& item: expanded_plists) { + delete item; } - if (!found) { return false; } diff --git a/src/or_iterator.cpp b/src/or_iterator.cpp index 5a88d68e..f2e72612 100644 --- a/src/or_iterator.cpp +++ b/src/or_iterator.cpp @@ -250,3 +250,97 @@ or_iterator_t::~or_iterator_t() noexcept { it.reset_cache(); } } + +bool or_iterator_t::contains_atleast_one(std::vector& its, result_iter_state_t&& istate) { + size_t it_size = its.size(); + bool is_excluded; + + switch (its.size()) { + case 0: + break; + case 1: + if(istate.is_filter_provided() && istate.is_filter_valid()) { + its[0].skip_to(istate.get_filter_id()); + } + + while(its.size() == it_size && its[0].valid()) { + auto id = its[0].id(); + if(take_id(istate, id, is_excluded)) { + return true; + } + + if(istate.is_filter_provided() && !is_excluded) { + if(istate.is_filter_valid()) { + // skip iterator till next id available in filter + its[0].skip_to(istate.get_filter_id()); + } else { + break; + } + } else { + its[0].next(); + } + } + break; + case 2: + if(istate.is_filter_provided() && istate.is_filter_valid()) { + its[0].skip_to(istate.get_filter_id()); + its[1].skip_to(istate.get_filter_id()); + } + + while(its.size() == it_size && !at_end2(its)) { + if(equals2(its)) { + auto id = its[0].id(); + if(take_id(istate, id, is_excluded)) { + return true; + } + + if(istate.is_filter_provided() != 0 && !is_excluded) { + if(istate.is_filter_valid()) { + // skip iterator till next id available in filter + its[0].skip_to(istate.get_filter_id()); + its[1].skip_to(istate.get_filter_id()); + } else { + break; + } + } else { + advance_all2(its); + } + } else { + advance_non_largest2(its); + } + } + break; + default: + if(istate.is_filter_provided() && istate.is_filter_valid()) { + for(auto& it: its) { + it.skip_to(istate.get_filter_id()); + } + } + + while(its.size() == it_size && !at_end(its)) { + if(equals(its)) { + auto id = its[0].id(); + if(take_id(istate, id, is_excluded)) { + return true; + } + + if(istate.is_filter_provided() && !is_excluded) { + if(istate.is_filter_valid()) { + // skip iterator till next id available in filter + for(auto& it: its) { + it.skip_to(istate.get_filter_id()); + } + } else { + break; + } + } else { + advance_all(its); + } + } else { + advance_non_largest(its); + } + } + } + + return false; +}