diff --git a/include/id_list.h b/include/id_list.h index ad890119..f5e2f3ba 100644 --- a/include/id_list.h +++ b/include/id_list.h @@ -154,6 +154,8 @@ public: uint32_t* uncompress(); void uncompress(std::vector& data); + + size_t intersect_count(const uint32_t* res_ids, size_t res_ids_len); }; template diff --git a/include/ids_t.h b/include/ids_t.h index 949c71b8..cec677f7 100644 --- a/include/ids_t.h +++ b/include/ids_t.h @@ -35,7 +35,7 @@ struct compact_id_list_t { [[nodiscard]] uint32_t num_ids() const; - bool contains_atleast_one(const uint32_t* target_ids, size_t target_ids_size); + size_t intersect_count(const uint32_t* res_ids, size_t res_ids_len); }; class ids_t { @@ -90,8 +90,6 @@ public: static bool contains(const void* obj, uint32_t id); - static bool contains_atleast_one(const void* obj, const uint32_t* target_ids, size_t target_ids_size); - static void merge(const std::vector& id_lists, std::vector& result_ids); static void intersect(const std::vector& id_lists, std::vector& result_ids); @@ -100,6 +98,8 @@ public: static void uncompress(void*& obj, std::vector& ids); + static size_t intersect_count(void*& obj, const uint32_t* result_ids, size_t result_ids_len); + static void to_expanded_id_lists(const std::vector& raw_id_lists, std::vector& id_lists, std::vector& expanded_id_lists); }; diff --git a/src/facet_index.cpp b/src/facet_index.cpp index b6eb36b2..7af3a4da 100644 --- a/src/facet_index.cpp +++ b/src/facet_index.cpp @@ -165,7 +165,6 @@ size_t facet_index_t::intersect(const std::string& field, const uint32_t* result size_t max_facets = is_wildcard_no_filter_query ? std::min((size_t)max_facet_count, counter_list.size()) : std::min((size_t)2 * max_facet_count, counter_list.size()); - std::vector id_list; for(const auto& facet_count : counter_list) { //LOG(INFO) << "checking ids in facet_value " << facet_count.facet_value << " having total count " // << facet_count.count << ", is_wildcard_no_filter_query: " << is_wildcard_no_filter_query; @@ -178,13 +177,7 @@ size_t facet_index_t::intersect(const std::string& field, const uint32_t* result if(!ids) { continue; } - - ids_t::uncompress(ids, id_list); - uint32_t* out = nullptr; - count = ArrayUtils::and_scalar(id_list.data(), id_list.size(), result_ids, result_ids_len, &out); - delete[] out; - - id_list.clear(); + count = ids_t::intersect_count(ids, result_ids, result_ids_len); } if(count) { diff --git a/src/id_list.cpp b/src/id_list.cpp index 82712368..0710873e 100644 --- a/src/id_list.cpp +++ b/src/id_list.cpp @@ -656,3 +656,24 @@ uint32_t* id_list_t::uncompress() { return arr; } + +size_t id_list_t::intersect_count(const uint32_t *res_ids, size_t res_ids_len) { + size_t count = 0; + size_t res_index = 0; + auto it = new_iterator(); + + while(it.valid() && res_index < res_ids_len) { + if(it.id() < res_ids[res_index]) { + it.skip_to(res_ids[res_index]); + } else if(it.id() > res_ids[res_index]) { + // returns index that is >= to value or last if no such element is found. + res_index = std::lower_bound(res_ids + res_index, res_ids + res_ids_len, it.id()) - res_ids; + } else { + it.next(); + res_index++; + count++; + } + } + + return count; +} diff --git a/src/ids_t.cpp b/src/ids_t.cpp index 082d01f5..7ee463a3 100644 --- a/src/ids_t.cpp +++ b/src/ids_t.cpp @@ -145,34 +145,27 @@ bool compact_id_list_t::contains(uint32_t id) { return false; } -bool compact_id_list_t::contains_atleast_one(const uint32_t* target_ids, size_t target_ids_size) { +size_t compact_id_list_t::intersect_count(const uint32_t* res_ids, size_t res_ids_len) { + size_t count = 0; size_t i = 0; - size_t target_ids_index = 0; + size_t res_index = 0; - while(i < length && target_ids_index < target_ids_size) { - size_t num_existing_offsets = ids[i]; - size_t existing_id = ids[i + num_existing_offsets + 1]; + while(i < length && res_index < res_ids_len) { + size_t curr_id = ids[i]; - // Returns iterator to the first element that is >= to value or last if no such element is found. - size_t found_index = std::lower_bound(target_ids + target_ids_index, - target_ids + target_ids_size, existing_id) - target_ids; - - if(found_index == target_ids_size) { - // all elements are lesser than lowest value (existing_id), so we can stop looking - return false; + if(curr_id < res_ids[res_index]) { + i++; + } else if(curr_id > res_ids[res_index]) { + // returns index that is >= to value or last if no such element is found. + res_index = std::lower_bound(res_ids + res_index, res_ids + res_ids_len, curr_id) - res_ids; } else { - if(target_ids[found_index] == existing_id) { - return true; - } - - // adjust lower bound to found_index+1 whose value is >= `existing_id` - target_ids_index = found_index; + i++; + res_index++; + count++; } - - i += num_existing_offsets + 2; } - return false; + return count; } /* posting operations */ @@ -289,16 +282,6 @@ bool ids_t::contains(const void* obj, uint32_t id) { } } -bool ids_t::contains_atleast_one(const void* obj, const uint32_t* target_ids, size_t target_ids_size) { - if(IS_COMPACT_IDS(obj)) { - compact_id_list_t* list = COMPACT_IDS_PTR(obj); - return list->contains_atleast_one(target_ids, target_ids_size); - } else { - id_list_t* list = (id_list_t*)(obj); - return list->contains_atleast_one(target_ids, target_ids_size); - } -} - void ids_t::merge(const std::vector& raw_posting_lists, std::vector& result_ids) { // we will have to convert the compact posting list (if any) to full form std::vector id_lists; @@ -382,6 +365,16 @@ void ids_t::uncompress(void*& obj, std::vector& ids) { } } +size_t ids_t::intersect_count(void*& obj, const uint32_t* result_ids, size_t result_ids_len) { + if(IS_COMPACT_IDS(obj)) { + compact_id_list_t* list = COMPACT_IDS_PTR(obj); + return list->intersect_count(result_ids, result_ids_len); + } else { + id_list_t* list = (id_list_t*)(obj); + return list->intersect_count(result_ids, result_ids_len); + } +} + void ids_t::block_intersector_t::split_lists(size_t concurrency, std::vector>& partial_its_vec) { const size_t num_blocks = this->id_lists[0]->num_blocks();