diff --git a/include/filter_result_iterator.h b/include/filter_result_iterator.h index f0d26c82..25f05ad2 100644 --- a/include/filter_result_iterator.h +++ b/include/filter_result_iterator.h @@ -14,13 +14,39 @@ struct reference_filter_result_t { uint32_t count = 0; uint32_t* docs = nullptr; - reference_filter_result_t& operator=(const reference_filter_result_t& obj) noexcept { - if (&obj == this) - return *this; + explicit reference_filter_result_t(uint32_t count = 0, uint32_t* docs = nullptr) : count(count), docs(docs) {} + + reference_filter_result_t(const reference_filter_result_t& obj) { + if (&obj == this) { + return; + } count = obj.count; docs = new uint32_t[count]; memcpy(docs, obj.docs, count * sizeof(uint32_t)); + } + + reference_filter_result_t& operator=(const reference_filter_result_t& obj) noexcept { + if (&obj == this) { + return *this; + } + + count = obj.count; + docs = new uint32_t[count]; + memcpy(docs, obj.docs, count * sizeof(uint32_t)); + + return *this; + } + + reference_filter_result_t& operator=(reference_filter_result_t&& obj) noexcept { + if (&obj == this) { + return *this; + } + + count = obj.count; + docs = obj.docs; + + obj.docs = nullptr; return *this; } @@ -41,8 +67,9 @@ struct single_filter_result_t { seq_id(seq_id), reference_filter_results(std::move(reference_filter_results)) {} single_filter_result_t(const single_filter_result_t& obj) { - if (&obj == this) + if (&obj == this) { return; + } seq_id = obj.seq_id; @@ -58,74 +85,63 @@ struct filter_result_t { uint32_t count = 0; uint32_t* docs = nullptr; // Collection name -> Reference filter result - std::map reference_filter_results; + std::map* coll_to_references = nullptr; filter_result_t() = default; filter_result_t(uint32_t count, uint32_t* docs) : count(count), docs(docs) {} filter_result_t(const filter_result_t& obj) { - if (&obj == this) + if (&obj == this) { return; + } count = obj.count; docs = new uint32_t[count]; memcpy(docs, obj.docs, count * sizeof(uint32_t)); - // Copy every collection's references. - for (const auto &item: obj.reference_filter_results) { - auto& ref_coll_name = item.first; - reference_filter_results[ref_coll_name] = new reference_filter_result_t[count]; - for (uint32_t i = 0; i < count; i++) { - reference_filter_results[ref_coll_name][i] = item.second[i]; - } - } + copy_references(obj, *this); } filter_result_t& operator=(const filter_result_t& obj) noexcept { - if (&obj == this) + if (&obj == this) { return *this; + } count = obj.count; docs = new uint32_t[count]; memcpy(docs, obj.docs, count * sizeof(uint32_t)); - // Copy every collection's references. - for (const auto &item: obj.reference_filter_results) { - reference_filter_results[item.first] = new reference_filter_result_t[count]; - - for (uint32_t i = 0; i < count; i++) { - reference_filter_results[item.first][i] = item.second[i]; - } - } + copy_references(obj, *this); return *this; } filter_result_t& operator=(filter_result_t&& obj) noexcept { - if (&obj == this) + if (&obj == this) { return *this; + } count = obj.count; docs = obj.docs; - reference_filter_results = std::map(obj.reference_filter_results); + coll_to_references = obj.coll_to_references; obj.docs = nullptr; - obj.reference_filter_results.clear(); + obj.coll_to_references = nullptr; return *this; } ~filter_result_t() { delete[] docs; - for (const auto &item: reference_filter_results) { - delete[] item.second; - } + delete[] coll_to_references; } static void and_filter_results(const filter_result_t& a, const filter_result_t& b, filter_result_t& result); static void or_filter_results(const filter_result_t& a, const filter_result_t& b, filter_result_t& result); + + static void copy_references(const filter_result_t& from, filter_result_t& to); }; class filter_result_iterator_t { @@ -175,7 +191,8 @@ private: explicit filter_result_iterator_t(uint32_t approx_filter_ids_length); /// Collects n doc ids while advancing the iterator. The iterator may become invalid during this operation. - void get_n_ids(const uint32_t& n, filter_result_t& result); + /// **The references are moved from filter_result_iterator_t. + void get_n_ids(const uint32_t& n, filter_result_t*& result); public: uint32_t seq_id = 0; @@ -221,11 +238,11 @@ public: void next(); /// Collects n doc ids while advancing the iterator. The ids present in excluded_result_ids are ignored. The - /// iterator may become invalid during this operation. + /// iterator may become invalid during this operation. **The references are moved from filter_result_iterator_t. void get_n_ids(const uint32_t& n, uint32_t& excluded_result_index, uint32_t const* const excluded_result_ids, const size_t& excluded_result_ids_size, - filter_result_t& result); + filter_result_t*& result); /// Advances the iterator until the doc value reaches or just overshoots id. The iterator may become invalid during /// this operation. diff --git a/src/filter_result_iterator.cpp b/src/filter_result_iterator.cpp index a3e0650a..7f4a45b9 100644 --- a/src/filter_result_iterator.cpp +++ b/src/filter_result_iterator.cpp @@ -13,6 +13,23 @@ #include "posting.h" #include "collection_manager.h" +void filter_result_t::copy_references(const filter_result_t& from, filter_result_t& to) { + if (from.coll_to_references == nullptr) { + return; + } + + auto const& count = from.count; + to.coll_to_references = new std::map[count] {}; + for (uint32_t i = 0; i < count; i++) { + if (from.coll_to_references[i].empty()) { + continue; + } + + auto& ref = to.coll_to_references[i]; + ref.insert(from.coll_to_references[i].begin(), from.coll_to_references[i].end()); + } +} + void filter_result_t::and_filter_results(const filter_result_t& a, const filter_result_t& b, filter_result_t& result) { auto lenA = a.count, lenB = b.count; if (lenA == 0 || lenB == 0) { @@ -25,16 +42,8 @@ void filter_result_t::and_filter_results(const filter_result_t& a, const filter_ const uint32_t *endA = A + lenA; const uint32_t *endB = B + lenB; - // Add an entry of references in the result for each unique collection in a and b. - for (auto const& item: a.reference_filter_results) { - if (result.reference_filter_results.count(item.first) == 0) { - result.reference_filter_results[item.first] = new reference_filter_result_t[std::min(lenA, lenB)]; - } - } - for (auto const& item: b.reference_filter_results) { - if (result.reference_filter_results.count(item.first) == 0) { - result.reference_filter_results[item.first] = new reference_filter_result_t[std::min(lenA, lenB)]; - } + if (a.coll_to_references != nullptr || b.coll_to_references != nullptr) { + result.coll_to_references = new std::map[std::min(lenA, lenB)] {}; } while (true) { @@ -54,12 +63,15 @@ void filter_result_t::and_filter_results(const filter_result_t& a, const filter_ if (*A == *B) { *out = *A; - // Copy the references of the document from every collection into result. - for (auto const& item: a.reference_filter_results) { - result.reference_filter_results[item.first][out - result.docs] = item.second[A - a.docs]; - } - for (auto const& item: b.reference_filter_results) { - result.reference_filter_results[item.first][out - result.docs] = item.second[B - b.docs]; + if (result.coll_to_references != nullptr) { + // Copy the references of the document from every collection into result. + auto& ref = result.coll_to_references[out - result.docs]; + if (a.coll_to_references != nullptr) { + ref.insert(a.coll_to_references[A - a.docs].begin(), a.coll_to_references[A - a.docs].end()); + } + if (b.coll_to_references != nullptr) { + ref.insert(b.coll_to_references[B - b.docs].begin(), b.coll_to_references[B - b.docs].end()); + } } out++; @@ -92,16 +104,8 @@ void filter_result_t::or_filter_results(const filter_result_t& a, const filter_r size_t indexA = 0, indexB = 0, res_index = 0, lenA = a.count, lenB = b.count; result.docs = new uint32_t[lenA + lenB]; - // Add an entry of references in the result for each unique collection in a and b. - for (auto const& item: a.reference_filter_results) { - if (result.reference_filter_results.count(item.first) == 0) { - result.reference_filter_results[item.first] = new reference_filter_result_t[lenA + lenB]; - } - } - for (auto const& item: b.reference_filter_results) { - if (result.reference_filter_results.count(item.first) == 0) { - result.reference_filter_results[item.first] = new reference_filter_result_t[lenA + lenB]; - } + if (a.coll_to_references != nullptr || b.coll_to_references != nullptr) { + result.coll_to_references = new std::map[lenA + lenB] {}; } while (indexA < lenA && indexB < lenB) { @@ -112,9 +116,10 @@ void filter_result_t::or_filter_results(const filter_result_t& a, const filter_r res_index++; } - // Copy references of the last result document from every collection in a. - for (auto const& item: a.reference_filter_results) { - result.reference_filter_results[item.first][res_index - 1] = item.second[indexA]; + if (a.coll_to_references != nullptr) { + // Copy references of the last result document from every collection in a. + auto &ref = result.coll_to_references[res_index - 1]; + ref.insert(a.coll_to_references[indexA].begin(), a.coll_to_references[indexA].end()); } indexA++; @@ -124,8 +129,9 @@ void filter_result_t::or_filter_results(const filter_result_t& a, const filter_r res_index++; } - for (auto const& item: b.reference_filter_results) { - result.reference_filter_results[item.first][res_index - 1] = item.second[indexB]; + if (b.coll_to_references != nullptr) { + auto &ref = result.coll_to_references[res_index - 1]; + ref.insert(b.coll_to_references[indexB].begin(), b.coll_to_references[indexB].end()); } indexB++; @@ -138,8 +144,9 @@ void filter_result_t::or_filter_results(const filter_result_t& a, const filter_r res_index++; } - for (auto const& item: a.reference_filter_results) { - result.reference_filter_results[item.first][res_index - 1] = item.second[indexA]; + if (a.coll_to_references != nullptr) { + auto &ref = result.coll_to_references[res_index - 1]; + ref.insert(a.coll_to_references[indexA].begin(), a.coll_to_references[indexA].end()); } indexA++; @@ -151,8 +158,9 @@ void filter_result_t::or_filter_results(const filter_result_t& a, const filter_r res_index++; } - for (auto const& item: b.reference_filter_results) { - result.reference_filter_results[item.first][res_index - 1] = item.second[indexB]; + if (b.coll_to_references != nullptr) { + auto &ref = result.coll_to_references[res_index - 1]; + ref.insert(b.coll_to_references[indexB].begin(), b.coll_to_references[indexB].end()); } indexB++; @@ -160,21 +168,27 @@ void filter_result_t::or_filter_results(const filter_result_t& a, const filter_r result.count = res_index; + if (res_index == lenA + lenB) { + return; + } + // shrink fit auto out = new uint32_t[res_index]; memcpy(out, result.docs, res_index * sizeof(uint32_t)); delete[] result.docs; result.docs = out; - for (auto &item: result.reference_filter_results) { - auto out_references = new reference_filter_result_t[res_index]; - - for (uint32_t i = 0; i < result.count; i++) { - out_references[i] = item.second[i]; - } - delete[] item.second; - item.second = out_references; + if (result.coll_to_references == nullptr) { + return; } + + auto out_references = new std::map[res_index] {}; + for (uint32_t i = 0; i < res_index; i++) { + auto& ref = out_references[i]; + ref.insert(result.coll_to_references[i].begin(), result.coll_to_references[i].end()); + } + + result.coll_to_references = out_references; } void filter_result_iterator_t::and_filter_iterators() { @@ -410,8 +424,9 @@ void filter_result_iterator_t::next() { seq_id = filter_result.docs[result_index]; reference.clear(); - for (auto const& item: filter_result.reference_filter_results) { - reference[item.first] = item.second[result_index]; + if (filter_result.coll_to_references != nullptr) { + auto& ref = filter_result.coll_to_references[result_index]; + reference.insert(ref.begin(), ref.end()); } return; @@ -666,8 +681,9 @@ void filter_result_iterator_t::init() { } seq_id = filter_result.docs[result_index]; - for (auto const& item: filter_result.reference_filter_results) { - reference[item.first] = item.second[result_index]; + if (filter_result.coll_to_references != nullptr) { + auto& ref = filter_result.coll_to_references[result_index]; + reference.insert(ref.begin(), ref.end()); } is_filter_result_initialized = true; @@ -1141,8 +1157,9 @@ void filter_result_iterator_t::skip_to(uint32_t id) { seq_id = filter_result.docs[result_index]; reference.clear(); - for (auto const& item: filter_result.reference_filter_results) { - reference[item.first] = item.second[result_index]; + if (filter_result.coll_to_references != nullptr) { + auto& ref = filter_result.coll_to_references[result_index]; + reference.insert(ref.begin(), ref.end()); } return; @@ -1374,8 +1391,9 @@ void filter_result_iterator_t::reset() { seq_id = filter_result.docs[result_index]; reference.clear(); - for (auto const& item: filter_result.reference_filter_results) { - reference[item.first] = item.second[result_index]; + if (filter_result.coll_to_references != nullptr) { + auto& ref = filter_result.coll_to_references[result_index]; + reference.insert(ref.begin(), ref.end()); } is_valid = true; @@ -1480,7 +1498,7 @@ void filter_result_iterator_t::and_scalar(const uint32_t* A, const uint32_t& len return; } - if (filter_result.reference_filter_results.empty()) { + if (filter_result.coll_to_references == nullptr) { if (is_filter_result_initialized) { result.count = ArrayUtils::and_scalar(A, lenA, filter_result.docs, filter_result.count, &result.docs); return; @@ -1528,16 +1546,15 @@ void filter_result_iterator_t::and_scalar(const uint32_t* A, const uint32_t& len result.count = match_indexes.size(); result.docs = new uint32_t[match_indexes.size()]; - for (auto const& item: filter_result.reference_filter_results) { - result.reference_filter_results[item.first] = new reference_filter_result_t[match_indexes.size()]; - } + result.coll_to_references = new std::map[match_indexes.size()] {}; for (uint32_t i = 0; i < match_indexes.size(); i++) { auto const& match_index = match_indexes[i]; result.docs[i] = filter_result.docs[match_index]; - for (auto const& item: filter_result.reference_filter_results) { - result.reference_filter_results[item.first][i] = item.second[match_index]; - } + + auto& result_reference = result.coll_to_references[i]; + result_reference.insert(filter_result.coll_to_references[match_index].begin(), + filter_result.coll_to_references[match_index].end()); } } @@ -1618,22 +1635,28 @@ filter_result_iterator_t &filter_result_iterator_t::operator=(filter_result_iter return *this; } -void filter_result_iterator_t::get_n_ids(const uint32_t& n, filter_result_t& result) { +void filter_result_iterator_t::get_n_ids(const uint32_t& n, filter_result_t*& result) { if (!is_filter_result_initialized) { return; } - auto result_length = result.count = std::min(n, filter_result.count - result_index); - result.docs = new uint32_t[result_length]; - for (const auto &item: filter_result.reference_filter_results) { - result.reference_filter_results[item.first] = new reference_filter_result_t[result_length]; + auto result_length = result->count = std::min(n, filter_result.count - result_index); + result->docs = new uint32_t[result_length]; + if (filter_result.coll_to_references != nullptr) { + result->coll_to_references = new std::map[result_length] {}; } for (uint32_t i = 0; i < result_length; i++, result_index++) { - result.docs[i] = filter_result.docs[result_index]; - for (const auto &item: filter_result.reference_filter_results) { - result.reference_filter_results[item.first][i] = item.second[result_index]; + result->docs[i] = filter_result.docs[result_index]; + + if (filter_result.coll_to_references == nullptr) { + continue; } + + auto& result_reference = result->coll_to_references[i]; + // Moving references since get_n_ids is only called in wildcard search flow and filter_result_iterator is + // not used afterwards. + result_reference = std::move(filter_result.coll_to_references[result_index]); } is_valid = result_index < filter_result.count; @@ -1642,7 +1665,7 @@ void filter_result_iterator_t::get_n_ids(const uint32_t& n, filter_result_t& res void filter_result_iterator_t::get_n_ids(const uint32_t& n, uint32_t& excluded_result_index, uint32_t const* const excluded_result_ids, const size_t& excluded_result_ids_size, - filter_result_t& result) { + filter_result_t*& result) { if (excluded_result_ids == nullptr || excluded_result_ids_size == 0 || excluded_result_index >= excluded_result_ids_size) { return get_n_ids(n, result); @@ -1663,18 +1686,24 @@ void filter_result_iterator_t::get_n_ids(const uint32_t& n, } } - result.count = match_indexes.size(); - result.docs = new uint32_t[match_indexes.size()]; - for (auto const& item: filter_result.reference_filter_results) { - result.reference_filter_results[item.first] = new reference_filter_result_t[match_indexes.size()]; + result->count = match_indexes.size(); + result->docs = new uint32_t[match_indexes.size()]; + if (filter_result.coll_to_references != nullptr) { + result->coll_to_references = new std::map[match_indexes.size()] {}; } for (uint32_t i = 0; i < match_indexes.size(); i++) { auto const& match_index = match_indexes[i]; - result.docs[i] = filter_result.docs[match_index]; - for (auto const& item: filter_result.reference_filter_results) { - result.reference_filter_results[item.first][i] = item.second[match_index]; + result->docs[i] = filter_result.docs[match_index]; + + if (filter_result.coll_to_references == nullptr) { + continue; } + + auto& result_reference = result->coll_to_references[i]; + // Moving references since get_n_ids is only called in wildcard search flow and filter_result_iterator is + // not used afterwards. + result_reference = std::move(filter_result.coll_to_references[match_index]); } is_valid = result_index < filter_result.count; diff --git a/src/index.cpp b/src/index.cpp index af0b9beb..490b044f 100644 --- a/src/index.cpp +++ b/src/index.cpp @@ -1,5 +1,6 @@ #include "index.h" +#include #include #include #include @@ -1697,29 +1698,61 @@ Option Index::do_reference_filtering_with_lock(filter_node_t* const filter uint32_t count = filter_result_iterator.to_filter_id_array(reference_docs); std::unique_ptr docs_guard(reference_docs); - // doc id -> reference doc ids - std::map> reference_map; - for (uint32_t i = 0; i < count; i++) { - auto reference_doc_id = reference_docs[i]; - auto doc_id = sort_index.at(reference_helper_field_name)->at(reference_doc_id); - - reference_map[doc_id].push_back(reference_doc_id); + if (count == 0) { + return Option(true); } - filter_result.count = reference_map.size(); - filter_result.docs = new uint32_t[reference_map.size()]; - filter_result.reference_filter_results[collection_name] = new reference_filter_result_t[reference_map.size()]; + // Collect all the doc ids from the reference ids. + std::vector> id_pairs; + std::unordered_set unique_doc_ids; + auto const& ref_index = *sort_index.at(reference_helper_field_name); + for (uint32_t i = 0; i < count; i++) { + auto& reference_doc_id = reference_docs[i]; + auto doc_id = ref_index.at(reference_doc_id); - size_t doc_index = 0; - for (auto &item: reference_map) { - filter_result.docs[doc_index] = item.first; + id_pairs.emplace_back(std::pair(doc_id, reference_doc_id)); + unique_doc_ids.insert(doc_id); + } - auto& reference_result = filter_result.reference_filter_results[collection_name][doc_index]; - reference_result.count = item.second.size(); - reference_result.docs = new uint32_t[item.second.size()]; - std::copy(item.second.begin(), item.second.end(), reference_result.docs); + std::sort(id_pairs.begin(), id_pairs.end(), [](auto const& left, auto const& right) { + return left.first < right.first; + }); - doc_index++; + filter_result.count = unique_doc_ids.size(); + filter_result.docs = new uint32_t[unique_doc_ids.size()]; + filter_result.coll_to_references = new std::map[unique_doc_ids.size()] {}; + + std::vector previous_doc_references; + for (uint32_t i = 0, previous_doc = id_pairs[0].first + 1, result_index = 0; i < id_pairs.size(); i++) { + auto const& current_doc = id_pairs[i].first; + auto const& reference_doc_id = id_pairs[i].second; + + if (current_doc != previous_doc) { + filter_result.docs[result_index] = current_doc; + if (result_index > 0) { + auto& reference_result = filter_result.coll_to_references[result_index - 1]; + + auto r = reference_filter_result_t(previous_doc_references.size(), new uint32_t[previous_doc_references.size()]); + std::copy(previous_doc_references.begin(), previous_doc_references.end(), r.docs); + reference_result[collection_name] = std::move(r); + + previous_doc_references.clear(); + } + + result_index++; + previous_doc = current_doc; + previous_doc_references.push_back(reference_doc_id); + } else { + previous_doc_references.push_back(reference_doc_id); + } + } + + if (!previous_doc_references.empty()) { + auto& reference_result = filter_result.coll_to_references[filter_result.count - 1]; + + auto r = reference_filter_result_t(previous_doc_references.size(), new uint32_t[previous_doc_references.size()]); + std::copy(previous_doc_references.begin(), previous_doc_references.end(), r.docs); + reference_result[collection_name] = std::move(r); } return Option(true); @@ -2287,6 +2320,7 @@ Option Index::search(std::vector& field_query_tokens, cons filter_iterator_guard.reset(filter_result_iterator); if (!do_phrase_search_op.ok()) { + delete [] all_result_ids; return do_phrase_search_op; } @@ -4692,9 +4726,9 @@ Option Index::do_infix_search(const size_t num_search_fields, const std::v for(size_t i = 0; i < raw_infix_ids_length; i++) { auto seq_id = raw_infix_ids[i]; - std::map, reference_filter_result_t> references; - for (const auto& item: filtered_infix_ids.reference_filter_results) { - references[item.first] = item.second[i]; + std::map references; + if (filtered_infix_ids.coll_to_references != nullptr) { + references = std::move(filtered_infix_ids.coll_to_references[i]); } int64_t match_score = 0; @@ -5049,9 +5083,9 @@ Option Index::search_wildcard(filter_node_t const* const& filter_tree_root Option* compute_sort_score_statuses[num_threads]; for(size_t thread_id = 0; thread_id < num_threads && filter_result_iterator->is_valid; thread_id++) { - filter_result_t batch_result; + auto batch_result = new filter_result_t(); filter_result_iterator->get_n_ids(window_size, excluded_result_index, exclude_token_ids, - exclude_token_ids_size, batch_result); + exclude_token_ids_size, batch_result); num_queued++; @@ -5067,6 +5101,7 @@ Option Index::search_wildcard(filter_node_t const* const& filter_tree_root check_for_circuit_break, batch_result, &num_processed, &m_process, &cv_process, &compute_sort_score_status, collection_name]() { + std::unique_ptr batch_result_guard(batch_result); search_begin_us = parent_search_begin; search_stop_us = parent_search_stop_ms; @@ -5074,11 +5109,11 @@ Option Index::search_wildcard(filter_node_t const* const& filter_tree_root size_t filter_index = 0; - for(size_t i = 0; i < batch_result.count; i++) { - const uint32_t seq_id = batch_result.docs[i]; + for(size_t i = 0; i < batch_result->count; i++) { + const uint32_t seq_id = batch_result->docs[i]; std::map, reference_filter_result_t> references; - for (const auto& item: batch_result.reference_filter_results) { - references[item.first] = item.second[i]; + if (batch_result->coll_to_references != nullptr) { + references = std::move(batch_result->coll_to_references[i]); } int64_t match_score = 0; diff --git a/test/collection_join_test.cpp b/test/collection_join_test.cpp index 212e720e..495f9ba4 100644 --- a/test/collection_join_test.cpp +++ b/test/collection_join_test.cpp @@ -651,7 +651,7 @@ TEST_F(CollectionJoinTest, AndFilterResults_NoReference) { filter_result_t::and_filter_results(a, b, result); ASSERT_EQ(2, result.count); - ASSERT_EQ(0, result.reference_filter_results.size()); + ASSERT_EQ(nullptr, result.coll_to_references); std::vector docs = {3, 6}; @@ -664,30 +664,31 @@ TEST_F(CollectionJoinTest, AndFilterResults_WithReferences) { filter_result_t a; a.count = 9; a.docs = new uint32_t[a.count]; - a.reference_filter_results["foo"] = new reference_filter_result_t[a.count]; + a.coll_to_references = new std::map[a.count] {}; + for (size_t i = 0; i < a.count; i++) { a.docs[i] = i; + auto& reference = a.coll_to_references[i]; // Having only one reference of each document for brevity. - auto& reference = a.reference_filter_results["foo"][i]; - reference.count = 1; - reference.docs = new uint32_t[1]; - reference.docs[0] = 10 - i; + auto reference_docs = new uint32_t[1]; + reference_docs[0] = 10 - i; + reference["foo"] = reference_filter_result_t(1, reference_docs); } filter_result_t b; b.count = 0; uint32_t limit = 10; b.docs = new uint32_t[limit]; - b.reference_filter_results["bar"] = new reference_filter_result_t[limit]; + b.coll_to_references = new std::map[limit] {}; for (size_t i = 2; i < limit; i++) { if (i % 3 == 0) { b.docs[b.count] = i; - auto& reference = b.reference_filter_results["bar"][b.count++]; - reference.count = 1; - reference.docs = new uint32_t[1]; - reference.docs[0] = 2 * i; + auto& reference = b.coll_to_references[b.count++]; + auto reference_docs = new uint32_t[1]; + reference_docs[0] = 2 * i; + reference["bar"] = reference_filter_result_t(1, reference_docs); } } @@ -696,9 +697,9 @@ TEST_F(CollectionJoinTest, AndFilterResults_WithReferences) { filter_result_t::and_filter_results(a, b, result); ASSERT_EQ(2, result.count); - ASSERT_EQ(2, result.reference_filter_results.size()); - ASSERT_EQ(1, result.reference_filter_results.count("foo")); - ASSERT_EQ(1, result.reference_filter_results.count("bar")); + ASSERT_EQ(2, result.coll_to_references[0].size()); + ASSERT_EQ(1, result.coll_to_references[0].count("foo")); + ASSERT_EQ(1, result.coll_to_references[0].count("bar")); std::vector docs = {3, 6}, foo_reference = {7, 4}, bar_reference = {6, 12}; @@ -706,10 +707,10 @@ TEST_F(CollectionJoinTest, AndFilterResults_WithReferences) { ASSERT_EQ(docs[i], result.docs[i]); // result should contain correct references to the foo and bar collection. - ASSERT_EQ(1, result.reference_filter_results["foo"][i].count); - ASSERT_EQ(foo_reference[i], result.reference_filter_results["foo"][i].docs[0]); - ASSERT_EQ(1, result.reference_filter_results["bar"][i].count); - ASSERT_EQ(bar_reference[i], result.reference_filter_results["bar"][i].docs[0]); + ASSERT_EQ(1, result.coll_to_references[i].at("foo").count); + ASSERT_EQ(foo_reference[i], result.coll_to_references[i].at("foo").docs[0]); + ASSERT_EQ(1, result.coll_to_references[i].at("bar").count); + ASSERT_EQ(bar_reference[i], result.coll_to_references[i].at("bar").docs[0]); } } @@ -728,7 +729,7 @@ TEST_F(CollectionJoinTest, OrFilterResults_NoReference) { filter_result_t result1; filter_result_t::or_filter_results(a, b, result1); ASSERT_EQ(3, result1.count); - ASSERT_EQ(0, result1.reference_filter_results.size()); + ASSERT_EQ(nullptr, result1.coll_to_references); std::vector expected = {3, 6, 9}; for (size_t i = 0; i < result1.count; i++) { @@ -745,7 +746,7 @@ TEST_F(CollectionJoinTest, OrFilterResults_NoReference) { filter_result_t result2; filter_result_t::or_filter_results(a, b, result2); ASSERT_EQ(10, result2.count); - ASSERT_EQ(0, result2.reference_filter_results.size()); + ASSERT_EQ(nullptr, result2.coll_to_references); expected = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; for (size_t i = 0; i < result2.count; i++) { @@ -765,7 +766,7 @@ TEST_F(CollectionJoinTest, OrFilterResults_NoReference) { // b.docs: [0..8], c.docs: [0, 4, 5] filter_result_t::or_filter_results(b, c, result3); ASSERT_EQ(9, result3.count); - ASSERT_EQ(0, result3.reference_filter_results.size()); + ASSERT_EQ(nullptr, result3.coll_to_references); expected = {0, 1, 2, 3, 4, 5, 6, 7, 8}; for(size_t i = 0; i < result3.count; i++) { @@ -779,15 +780,15 @@ TEST_F(CollectionJoinTest, OrFilterResults_WithReferences) { a.count = 0; a.docs = new uint32_t[limit]; - a.reference_filter_results["foo"] = new reference_filter_result_t[limit]; + a.coll_to_references = new std::map[limit] {}; for (size_t i = 2; i < limit; i++) { if (i % 3 == 0) { a.docs[a.count] = i; - auto& reference = a.reference_filter_results["foo"][a.count++]; - reference.count = 1; - reference.docs = new uint32_t[1]; - reference.docs[0] = 2 * i; + auto& reference = a.coll_to_references[a.count++]; + auto reference_docs = new uint32_t[1]; + reference_docs[0] = 2 * i; + reference["foo"] = reference_filter_result_t(1, reference_docs); } } @@ -796,27 +797,27 @@ TEST_F(CollectionJoinTest, OrFilterResults_WithReferences) { filter_result_t::or_filter_results(a, b, result1); ASSERT_EQ(3, result1.count); - ASSERT_EQ(1, result1.reference_filter_results.size()); - ASSERT_EQ(1, result1.reference_filter_results.count("foo")); + ASSERT_EQ(1, result1.coll_to_references[0].size()); + ASSERT_EQ(1, result1.coll_to_references[0].count("foo")); std::vector expected = {3, 6, 9}, foo_reference = {6, 12, 18}; for (size_t i = 0; i < result1.count; i++) { ASSERT_EQ(expected[i], result1.docs[i]); - ASSERT_EQ(1, result1.reference_filter_results["foo"][i].count); - ASSERT_EQ(foo_reference[i], result1.reference_filter_results["foo"][i].docs[0]); + ASSERT_EQ(1, result1.coll_to_references[i].at("foo").count); + ASSERT_EQ(foo_reference[i], result1.coll_to_references[i].at("foo").docs[0]); } b.count = 9; b.docs = new uint32_t[b.count]; - b.reference_filter_results["bar"] = new reference_filter_result_t[b.count]; + b.coll_to_references = new std::map[b.count] {}; for (size_t i = 0; i < b.count; i++) { b.docs[i] = i; - auto& reference = b.reference_filter_results["bar"][i]; - reference.count = 1; - reference.docs = new uint32_t[1]; - reference.docs[0] = 10 - i; + auto& reference = b.coll_to_references[i]; + auto reference_docs = new uint32_t[1]; + reference_docs[0] = 10 - i; + reference["bar"] = reference_filter_result_t(1, reference_docs); } // a.docs: [3, 6, 9], b.docs: [0..8] @@ -833,18 +834,18 @@ TEST_F(CollectionJoinTest, OrFilterResults_WithReferences) { ASSERT_EQ(expected[i], result2.docs[i]); if (foo_map.count(i) != 0) { - ASSERT_EQ(1, result2.reference_filter_results["foo"][i].count); - ASSERT_EQ(foo_map[i], result2.reference_filter_results["foo"][i].docs[0]); + ASSERT_EQ(1, result2.coll_to_references[i].at("foo").count); + ASSERT_EQ(foo_map[i], result2.coll_to_references[i].at("foo").docs[0]); } else { - // Reference count should be 0 for the docs that were not present in the a result. - ASSERT_EQ(0, result2.reference_filter_results["foo"][i].count); + // foo didn't have any reference to current doc. + ASSERT_EQ(0, result2.coll_to_references[i].count("foo")); } if (bar_map.count(i) != 0) { - ASSERT_EQ(1, result2.reference_filter_results["bar"][i].count); - ASSERT_EQ(bar_map[i], result2.reference_filter_results["bar"][i].docs[0]); + ASSERT_EQ(1, result2.coll_to_references[i].at("bar").count); + ASSERT_EQ(bar_map[i], result2.coll_to_references[i].at("bar").docs[0]); } else { - ASSERT_EQ(0, result2.reference_filter_results["bar"][i].count); + ASSERT_EQ(0, result2.coll_to_references[i].count("bar")); } } @@ -853,15 +854,15 @@ TEST_F(CollectionJoinTest, OrFilterResults_WithReferences) { std::map baz_map = {{0, 2}, {4, 0}, {5, 8}}; c.count = baz_map.size(); c.docs = new uint32_t[baz_map.size()]; - c.reference_filter_results["baz"] = new reference_filter_result_t[baz_map.size()]; + c.coll_to_references = new std::map[baz_map.size()] {}; auto j = 0; for(auto i: baz_map) { c.docs[j] = i.first; - auto& reference = c.reference_filter_results["baz"][j++]; - reference.count = 1; - reference.docs = new uint32_t[1]; - reference.docs[0] = i.second; + auto& reference = c.coll_to_references[j++]; + auto reference_docs = new uint32_t[1]; + reference_docs[0] = i.second; + reference["baz"] = reference_filter_result_t(1, reference_docs); } // b.docs: [0..8], c.docs: [0, 4, 5] @@ -873,17 +874,17 @@ TEST_F(CollectionJoinTest, OrFilterResults_WithReferences) { ASSERT_EQ(expected[i], result3.docs[i]); if (bar_map.count(i) != 0) { - ASSERT_EQ(1, result3.reference_filter_results["bar"][i].count); - ASSERT_EQ(bar_map[i], result3.reference_filter_results["bar"][i].docs[0]); + ASSERT_EQ(1, result3.coll_to_references[i].at("bar").count); + ASSERT_EQ(bar_map[i], result3.coll_to_references[i].at("bar").docs[0]); } else { - ASSERT_EQ(0, result3.reference_filter_results["bar"][i].count); + ASSERT_EQ(0, result3.coll_to_references[i].count("bar")); } if (baz_map.count(i) != 0) { - ASSERT_EQ(1, result3.reference_filter_results["baz"][i].count); - ASSERT_EQ(baz_map[i], result3.reference_filter_results["baz"][i].docs[0]); + ASSERT_EQ(1, result3.coll_to_references[i].at("baz").count); + ASSERT_EQ(baz_map[i], result3.coll_to_references[i].at("baz").docs[0]); } else { - ASSERT_EQ(0, result3.reference_filter_results["baz"][i].count); + ASSERT_EQ(0, result3.coll_to_references[i].count("baz")); } } }