Merge pull request #1235 from happy-san/v0.26-facets

Refactor `filter_result_t::reference_filter_results`.
This commit is contained in:
Kishore Nallan 2023-09-25 12:44:01 +05:30 committed by GitHub
commit b69e2a2687
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 268 additions and 186 deletions

View File

@ -14,13 +14,39 @@ struct reference_filter_result_t {
uint32_t count = 0;
uint32_t* docs = nullptr;
reference_filter_result_t& operator=(const reference_filter_result_t& obj) noexcept {
if (&obj == this)
return *this;
explicit reference_filter_result_t(uint32_t count = 0, uint32_t* docs = nullptr) : count(count), docs(docs) {}
reference_filter_result_t(const reference_filter_result_t& obj) {
if (&obj == this) {
return;
}
count = obj.count;
docs = new uint32_t[count];
memcpy(docs, obj.docs, count * sizeof(uint32_t));
}
reference_filter_result_t& operator=(const reference_filter_result_t& obj) noexcept {
if (&obj == this) {
return *this;
}
count = obj.count;
docs = new uint32_t[count];
memcpy(docs, obj.docs, count * sizeof(uint32_t));
return *this;
}
reference_filter_result_t& operator=(reference_filter_result_t&& obj) noexcept {
if (&obj == this) {
return *this;
}
count = obj.count;
docs = obj.docs;
obj.docs = nullptr;
return *this;
}
@ -41,8 +67,9 @@ struct single_filter_result_t {
seq_id(seq_id), reference_filter_results(std::move(reference_filter_results)) {}
single_filter_result_t(const single_filter_result_t& obj) {
if (&obj == this)
if (&obj == this) {
return;
}
seq_id = obj.seq_id;
@ -58,74 +85,63 @@ struct filter_result_t {
uint32_t count = 0;
uint32_t* docs = nullptr;
// Collection name -> Reference filter result
std::map<std::string, reference_filter_result_t*> reference_filter_results;
std::map<std::string, reference_filter_result_t>* coll_to_references = nullptr;
filter_result_t() = default;
filter_result_t(uint32_t count, uint32_t* docs) : count(count), docs(docs) {}
filter_result_t(const filter_result_t& obj) {
if (&obj == this)
if (&obj == this) {
return;
}
count = obj.count;
docs = new uint32_t[count];
memcpy(docs, obj.docs, count * sizeof(uint32_t));
// Copy every collection's references.
for (const auto &item: obj.reference_filter_results) {
auto& ref_coll_name = item.first;
reference_filter_results[ref_coll_name] = new reference_filter_result_t[count];
for (uint32_t i = 0; i < count; i++) {
reference_filter_results[ref_coll_name][i] = item.second[i];
}
}
copy_references(obj, *this);
}
filter_result_t& operator=(const filter_result_t& obj) noexcept {
if (&obj == this)
if (&obj == this) {
return *this;
}
count = obj.count;
docs = new uint32_t[count];
memcpy(docs, obj.docs, count * sizeof(uint32_t));
// Copy every collection's references.
for (const auto &item: obj.reference_filter_results) {
reference_filter_results[item.first] = new reference_filter_result_t[count];
for (uint32_t i = 0; i < count; i++) {
reference_filter_results[item.first][i] = item.second[i];
}
}
copy_references(obj, *this);
return *this;
}
filter_result_t& operator=(filter_result_t&& obj) noexcept {
if (&obj == this)
if (&obj == this) {
return *this;
}
count = obj.count;
docs = obj.docs;
reference_filter_results = std::map(obj.reference_filter_results);
coll_to_references = obj.coll_to_references;
obj.docs = nullptr;
obj.reference_filter_results.clear();
obj.coll_to_references = nullptr;
return *this;
}
~filter_result_t() {
delete[] docs;
for (const auto &item: reference_filter_results) {
delete[] item.second;
}
delete[] coll_to_references;
}
static void and_filter_results(const filter_result_t& a, const filter_result_t& b, filter_result_t& result);
static void or_filter_results(const filter_result_t& a, const filter_result_t& b, filter_result_t& result);
static void copy_references(const filter_result_t& from, filter_result_t& to);
};
class filter_result_iterator_t {
@ -175,7 +191,8 @@ private:
explicit filter_result_iterator_t(uint32_t approx_filter_ids_length);
/// Collects n doc ids while advancing the iterator. The iterator may become invalid during this operation.
void get_n_ids(const uint32_t& n, filter_result_t& result);
/// **The references are moved from filter_result_iterator_t.
void get_n_ids(const uint32_t& n, filter_result_t*& result);
public:
uint32_t seq_id = 0;
@ -221,11 +238,11 @@ public:
void next();
/// Collects n doc ids while advancing the iterator. The ids present in excluded_result_ids are ignored. The
/// iterator may become invalid during this operation.
/// iterator may become invalid during this operation. **The references are moved from filter_result_iterator_t.
void get_n_ids(const uint32_t& n,
uint32_t& excluded_result_index,
uint32_t const* const excluded_result_ids, const size_t& excluded_result_ids_size,
filter_result_t& result);
filter_result_t*& result);
/// Advances the iterator until the doc value reaches or just overshoots id. The iterator may become invalid during
/// this operation.

View File

@ -13,6 +13,23 @@
#include "posting.h"
#include "collection_manager.h"
void filter_result_t::copy_references(const filter_result_t& from, filter_result_t& to) {
if (from.coll_to_references == nullptr) {
return;
}
auto const& count = from.count;
to.coll_to_references = new std::map<std::string, reference_filter_result_t>[count] {};
for (uint32_t i = 0; i < count; i++) {
if (from.coll_to_references[i].empty()) {
continue;
}
auto& ref = to.coll_to_references[i];
ref.insert(from.coll_to_references[i].begin(), from.coll_to_references[i].end());
}
}
void filter_result_t::and_filter_results(const filter_result_t& a, const filter_result_t& b, filter_result_t& result) {
auto lenA = a.count, lenB = b.count;
if (lenA == 0 || lenB == 0) {
@ -25,16 +42,8 @@ void filter_result_t::and_filter_results(const filter_result_t& a, const filter_
const uint32_t *endA = A + lenA;
const uint32_t *endB = B + lenB;
// Add an entry of references in the result for each unique collection in a and b.
for (auto const& item: a.reference_filter_results) {
if (result.reference_filter_results.count(item.first) == 0) {
result.reference_filter_results[item.first] = new reference_filter_result_t[std::min(lenA, lenB)];
}
}
for (auto const& item: b.reference_filter_results) {
if (result.reference_filter_results.count(item.first) == 0) {
result.reference_filter_results[item.first] = new reference_filter_result_t[std::min(lenA, lenB)];
}
if (a.coll_to_references != nullptr || b.coll_to_references != nullptr) {
result.coll_to_references = new std::map<std::string, reference_filter_result_t>[std::min(lenA, lenB)] {};
}
while (true) {
@ -54,12 +63,15 @@ void filter_result_t::and_filter_results(const filter_result_t& a, const filter_
if (*A == *B) {
*out = *A;
// Copy the references of the document from every collection into result.
for (auto const& item: a.reference_filter_results) {
result.reference_filter_results[item.first][out - result.docs] = item.second[A - a.docs];
}
for (auto const& item: b.reference_filter_results) {
result.reference_filter_results[item.first][out - result.docs] = item.second[B - b.docs];
if (result.coll_to_references != nullptr) {
// Copy the references of the document from every collection into result.
auto& ref = result.coll_to_references[out - result.docs];
if (a.coll_to_references != nullptr) {
ref.insert(a.coll_to_references[A - a.docs].begin(), a.coll_to_references[A - a.docs].end());
}
if (b.coll_to_references != nullptr) {
ref.insert(b.coll_to_references[B - b.docs].begin(), b.coll_to_references[B - b.docs].end());
}
}
out++;
@ -92,16 +104,8 @@ void filter_result_t::or_filter_results(const filter_result_t& a, const filter_r
size_t indexA = 0, indexB = 0, res_index = 0, lenA = a.count, lenB = b.count;
result.docs = new uint32_t[lenA + lenB];
// Add an entry of references in the result for each unique collection in a and b.
for (auto const& item: a.reference_filter_results) {
if (result.reference_filter_results.count(item.first) == 0) {
result.reference_filter_results[item.first] = new reference_filter_result_t[lenA + lenB];
}
}
for (auto const& item: b.reference_filter_results) {
if (result.reference_filter_results.count(item.first) == 0) {
result.reference_filter_results[item.first] = new reference_filter_result_t[lenA + lenB];
}
if (a.coll_to_references != nullptr || b.coll_to_references != nullptr) {
result.coll_to_references = new std::map<std::string, reference_filter_result_t>[lenA + lenB] {};
}
while (indexA < lenA && indexB < lenB) {
@ -112,9 +116,10 @@ void filter_result_t::or_filter_results(const filter_result_t& a, const filter_r
res_index++;
}
// Copy references of the last result document from every collection in a.
for (auto const& item: a.reference_filter_results) {
result.reference_filter_results[item.first][res_index - 1] = item.second[indexA];
if (a.coll_to_references != nullptr) {
// Copy references of the last result document from every collection in a.
auto &ref = result.coll_to_references[res_index - 1];
ref.insert(a.coll_to_references[indexA].begin(), a.coll_to_references[indexA].end());
}
indexA++;
@ -124,8 +129,9 @@ void filter_result_t::or_filter_results(const filter_result_t& a, const filter_r
res_index++;
}
for (auto const& item: b.reference_filter_results) {
result.reference_filter_results[item.first][res_index - 1] = item.second[indexB];
if (b.coll_to_references != nullptr) {
auto &ref = result.coll_to_references[res_index - 1];
ref.insert(b.coll_to_references[indexB].begin(), b.coll_to_references[indexB].end());
}
indexB++;
@ -138,8 +144,9 @@ void filter_result_t::or_filter_results(const filter_result_t& a, const filter_r
res_index++;
}
for (auto const& item: a.reference_filter_results) {
result.reference_filter_results[item.first][res_index - 1] = item.second[indexA];
if (a.coll_to_references != nullptr) {
auto &ref = result.coll_to_references[res_index - 1];
ref.insert(a.coll_to_references[indexA].begin(), a.coll_to_references[indexA].end());
}
indexA++;
@ -151,8 +158,9 @@ void filter_result_t::or_filter_results(const filter_result_t& a, const filter_r
res_index++;
}
for (auto const& item: b.reference_filter_results) {
result.reference_filter_results[item.first][res_index - 1] = item.second[indexB];
if (b.coll_to_references != nullptr) {
auto &ref = result.coll_to_references[res_index - 1];
ref.insert(b.coll_to_references[indexB].begin(), b.coll_to_references[indexB].end());
}
indexB++;
@ -160,21 +168,27 @@ void filter_result_t::or_filter_results(const filter_result_t& a, const filter_r
result.count = res_index;
if (res_index == lenA + lenB) {
return;
}
// shrink fit
auto out = new uint32_t[res_index];
memcpy(out, result.docs, res_index * sizeof(uint32_t));
delete[] result.docs;
result.docs = out;
for (auto &item: result.reference_filter_results) {
auto out_references = new reference_filter_result_t[res_index];
for (uint32_t i = 0; i < result.count; i++) {
out_references[i] = item.second[i];
}
delete[] item.second;
item.second = out_references;
if (result.coll_to_references == nullptr) {
return;
}
auto out_references = new std::map<std::string, reference_filter_result_t>[res_index] {};
for (uint32_t i = 0; i < res_index; i++) {
auto& ref = out_references[i];
ref.insert(result.coll_to_references[i].begin(), result.coll_to_references[i].end());
}
result.coll_to_references = out_references;
}
void filter_result_iterator_t::and_filter_iterators() {
@ -410,8 +424,9 @@ void filter_result_iterator_t::next() {
seq_id = filter_result.docs[result_index];
reference.clear();
for (auto const& item: filter_result.reference_filter_results) {
reference[item.first] = item.second[result_index];
if (filter_result.coll_to_references != nullptr) {
auto& ref = filter_result.coll_to_references[result_index];
reference.insert(ref.begin(), ref.end());
}
return;
@ -666,8 +681,9 @@ void filter_result_iterator_t::init() {
}
seq_id = filter_result.docs[result_index];
for (auto const& item: filter_result.reference_filter_results) {
reference[item.first] = item.second[result_index];
if (filter_result.coll_to_references != nullptr) {
auto& ref = filter_result.coll_to_references[result_index];
reference.insert(ref.begin(), ref.end());
}
is_filter_result_initialized = true;
@ -1141,8 +1157,9 @@ void filter_result_iterator_t::skip_to(uint32_t id) {
seq_id = filter_result.docs[result_index];
reference.clear();
for (auto const& item: filter_result.reference_filter_results) {
reference[item.first] = item.second[result_index];
if (filter_result.coll_to_references != nullptr) {
auto& ref = filter_result.coll_to_references[result_index];
reference.insert(ref.begin(), ref.end());
}
return;
@ -1374,8 +1391,9 @@ void filter_result_iterator_t::reset() {
seq_id = filter_result.docs[result_index];
reference.clear();
for (auto const& item: filter_result.reference_filter_results) {
reference[item.first] = item.second[result_index];
if (filter_result.coll_to_references != nullptr) {
auto& ref = filter_result.coll_to_references[result_index];
reference.insert(ref.begin(), ref.end());
}
is_valid = true;
@ -1480,7 +1498,7 @@ void filter_result_iterator_t::and_scalar(const uint32_t* A, const uint32_t& len
return;
}
if (filter_result.reference_filter_results.empty()) {
if (filter_result.coll_to_references == nullptr) {
if (is_filter_result_initialized) {
result.count = ArrayUtils::and_scalar(A, lenA, filter_result.docs, filter_result.count, &result.docs);
return;
@ -1528,16 +1546,15 @@ void filter_result_iterator_t::and_scalar(const uint32_t* A, const uint32_t& len
result.count = match_indexes.size();
result.docs = new uint32_t[match_indexes.size()];
for (auto const& item: filter_result.reference_filter_results) {
result.reference_filter_results[item.first] = new reference_filter_result_t[match_indexes.size()];
}
result.coll_to_references = new std::map<std::string, reference_filter_result_t>[match_indexes.size()] {};
for (uint32_t i = 0; i < match_indexes.size(); i++) {
auto const& match_index = match_indexes[i];
result.docs[i] = filter_result.docs[match_index];
for (auto const& item: filter_result.reference_filter_results) {
result.reference_filter_results[item.first][i] = item.second[match_index];
}
auto& result_reference = result.coll_to_references[i];
result_reference.insert(filter_result.coll_to_references[match_index].begin(),
filter_result.coll_to_references[match_index].end());
}
}
@ -1618,22 +1635,28 @@ filter_result_iterator_t &filter_result_iterator_t::operator=(filter_result_iter
return *this;
}
void filter_result_iterator_t::get_n_ids(const uint32_t& n, filter_result_t& result) {
void filter_result_iterator_t::get_n_ids(const uint32_t& n, filter_result_t*& result) {
if (!is_filter_result_initialized) {
return;
}
auto result_length = result.count = std::min(n, filter_result.count - result_index);
result.docs = new uint32_t[result_length];
for (const auto &item: filter_result.reference_filter_results) {
result.reference_filter_results[item.first] = new reference_filter_result_t[result_length];
auto result_length = result->count = std::min(n, filter_result.count - result_index);
result->docs = new uint32_t[result_length];
if (filter_result.coll_to_references != nullptr) {
result->coll_to_references = new std::map<std::string, reference_filter_result_t>[result_length] {};
}
for (uint32_t i = 0; i < result_length; i++, result_index++) {
result.docs[i] = filter_result.docs[result_index];
for (const auto &item: filter_result.reference_filter_results) {
result.reference_filter_results[item.first][i] = item.second[result_index];
result->docs[i] = filter_result.docs[result_index];
if (filter_result.coll_to_references == nullptr) {
continue;
}
auto& result_reference = result->coll_to_references[i];
// Moving references since get_n_ids is only called in wildcard search flow and filter_result_iterator is
// not used afterwards.
result_reference = std::move(filter_result.coll_to_references[result_index]);
}
is_valid = result_index < filter_result.count;
@ -1642,7 +1665,7 @@ void filter_result_iterator_t::get_n_ids(const uint32_t& n, filter_result_t& res
void filter_result_iterator_t::get_n_ids(const uint32_t& n,
uint32_t& excluded_result_index,
uint32_t const* const excluded_result_ids, const size_t& excluded_result_ids_size,
filter_result_t& result) {
filter_result_t*& result) {
if (excluded_result_ids == nullptr || excluded_result_ids_size == 0 ||
excluded_result_index >= excluded_result_ids_size) {
return get_n_ids(n, result);
@ -1663,18 +1686,24 @@ void filter_result_iterator_t::get_n_ids(const uint32_t& n,
}
}
result.count = match_indexes.size();
result.docs = new uint32_t[match_indexes.size()];
for (auto const& item: filter_result.reference_filter_results) {
result.reference_filter_results[item.first] = new reference_filter_result_t[match_indexes.size()];
result->count = match_indexes.size();
result->docs = new uint32_t[match_indexes.size()];
if (filter_result.coll_to_references != nullptr) {
result->coll_to_references = new std::map<std::string, reference_filter_result_t>[match_indexes.size()] {};
}
for (uint32_t i = 0; i < match_indexes.size(); i++) {
auto const& match_index = match_indexes[i];
result.docs[i] = filter_result.docs[match_index];
for (auto const& item: filter_result.reference_filter_results) {
result.reference_filter_results[item.first][i] = item.second[match_index];
result->docs[i] = filter_result.docs[match_index];
if (filter_result.coll_to_references == nullptr) {
continue;
}
auto& result_reference = result->coll_to_references[i];
// Moving references since get_n_ids is only called in wildcard search flow and filter_result_iterator is
// not used afterwards.
result_reference = std::move(filter_result.coll_to_references[match_index]);
}
is_valid = result_index < filter_result.count;

View File

@ -1,5 +1,6 @@
#include "index.h"
#include <memory>
#include <numeric>
#include <chrono>
#include <set>
@ -1697,29 +1698,61 @@ Option<bool> Index::do_reference_filtering_with_lock(filter_node_t* const filter
uint32_t count = filter_result_iterator.to_filter_id_array(reference_docs);
std::unique_ptr<uint32_t[]> docs_guard(reference_docs);
// doc id -> reference doc ids
std::map<uint32_t, std::vector<uint32_t>> reference_map;
for (uint32_t i = 0; i < count; i++) {
auto reference_doc_id = reference_docs[i];
auto doc_id = sort_index.at(reference_helper_field_name)->at(reference_doc_id);
reference_map[doc_id].push_back(reference_doc_id);
if (count == 0) {
return Option(true);
}
filter_result.count = reference_map.size();
filter_result.docs = new uint32_t[reference_map.size()];
filter_result.reference_filter_results[collection_name] = new reference_filter_result_t[reference_map.size()];
// Collect all the doc ids from the reference ids.
std::vector<std::pair<uint32_t, uint32_t>> id_pairs;
std::unordered_set<uint32_t> unique_doc_ids;
auto const& ref_index = *sort_index.at(reference_helper_field_name);
for (uint32_t i = 0; i < count; i++) {
auto& reference_doc_id = reference_docs[i];
auto doc_id = ref_index.at(reference_doc_id);
size_t doc_index = 0;
for (auto &item: reference_map) {
filter_result.docs[doc_index] = item.first;
id_pairs.emplace_back(std::pair(doc_id, reference_doc_id));
unique_doc_ids.insert(doc_id);
}
auto& reference_result = filter_result.reference_filter_results[collection_name][doc_index];
reference_result.count = item.second.size();
reference_result.docs = new uint32_t[item.second.size()];
std::copy(item.second.begin(), item.second.end(), reference_result.docs);
std::sort(id_pairs.begin(), id_pairs.end(), [](auto const& left, auto const& right) {
return left.first < right.first;
});
doc_index++;
filter_result.count = unique_doc_ids.size();
filter_result.docs = new uint32_t[unique_doc_ids.size()];
filter_result.coll_to_references = new std::map<std::string, reference_filter_result_t>[unique_doc_ids.size()] {};
std::vector<uint32_t> previous_doc_references;
for (uint32_t i = 0, previous_doc = id_pairs[0].first + 1, result_index = 0; i < id_pairs.size(); i++) {
auto const& current_doc = id_pairs[i].first;
auto const& reference_doc_id = id_pairs[i].second;
if (current_doc != previous_doc) {
filter_result.docs[result_index] = current_doc;
if (result_index > 0) {
auto& reference_result = filter_result.coll_to_references[result_index - 1];
auto r = reference_filter_result_t(previous_doc_references.size(), new uint32_t[previous_doc_references.size()]);
std::copy(previous_doc_references.begin(), previous_doc_references.end(), r.docs);
reference_result[collection_name] = std::move(r);
previous_doc_references.clear();
}
result_index++;
previous_doc = current_doc;
previous_doc_references.push_back(reference_doc_id);
} else {
previous_doc_references.push_back(reference_doc_id);
}
}
if (!previous_doc_references.empty()) {
auto& reference_result = filter_result.coll_to_references[filter_result.count - 1];
auto r = reference_filter_result_t(previous_doc_references.size(), new uint32_t[previous_doc_references.size()]);
std::copy(previous_doc_references.begin(), previous_doc_references.end(), r.docs);
reference_result[collection_name] = std::move(r);
}
return Option(true);
@ -2287,6 +2320,7 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
filter_iterator_guard.reset(filter_result_iterator);
if (!do_phrase_search_op.ok()) {
delete [] all_result_ids;
return do_phrase_search_op;
}
@ -4692,9 +4726,9 @@ Option<bool> Index::do_infix_search(const size_t num_search_fields, const std::v
for(size_t i = 0; i < raw_infix_ids_length; i++) {
auto seq_id = raw_infix_ids[i];
std::map<basic_string<char>, reference_filter_result_t> references;
for (const auto& item: filtered_infix_ids.reference_filter_results) {
references[item.first] = item.second[i];
std::map<std::string, reference_filter_result_t> references;
if (filtered_infix_ids.coll_to_references != nullptr) {
references = std::move(filtered_infix_ids.coll_to_references[i]);
}
int64_t match_score = 0;
@ -5049,9 +5083,9 @@ Option<bool> Index::search_wildcard(filter_node_t const* const& filter_tree_root
Option<bool>* compute_sort_score_statuses[num_threads];
for(size_t thread_id = 0; thread_id < num_threads && filter_result_iterator->is_valid; thread_id++) {
filter_result_t batch_result;
auto batch_result = new filter_result_t();
filter_result_iterator->get_n_ids(window_size, excluded_result_index, exclude_token_ids,
exclude_token_ids_size, batch_result);
exclude_token_ids_size, batch_result);
num_queued++;
@ -5067,6 +5101,7 @@ Option<bool> Index::search_wildcard(filter_node_t const* const& filter_tree_root
check_for_circuit_break,
batch_result,
&num_processed, &m_process, &cv_process, &compute_sort_score_status, collection_name]() {
std::unique_ptr<filter_result_t> batch_result_guard(batch_result);
search_begin_us = parent_search_begin;
search_stop_us = parent_search_stop_ms;
@ -5074,11 +5109,11 @@ Option<bool> Index::search_wildcard(filter_node_t const* const& filter_tree_root
size_t filter_index = 0;
for(size_t i = 0; i < batch_result.count; i++) {
const uint32_t seq_id = batch_result.docs[i];
for(size_t i = 0; i < batch_result->count; i++) {
const uint32_t seq_id = batch_result->docs[i];
std::map<basic_string<char>, reference_filter_result_t> references;
for (const auto& item: batch_result.reference_filter_results) {
references[item.first] = item.second[i];
if (batch_result->coll_to_references != nullptr) {
references = std::move(batch_result->coll_to_references[i]);
}
int64_t match_score = 0;

View File

@ -651,7 +651,7 @@ TEST_F(CollectionJoinTest, AndFilterResults_NoReference) {
filter_result_t::and_filter_results(a, b, result);
ASSERT_EQ(2, result.count);
ASSERT_EQ(0, result.reference_filter_results.size());
ASSERT_EQ(nullptr, result.coll_to_references);
std::vector<uint32_t> docs = {3, 6};
@ -664,30 +664,31 @@ TEST_F(CollectionJoinTest, AndFilterResults_WithReferences) {
filter_result_t a;
a.count = 9;
a.docs = new uint32_t[a.count];
a.reference_filter_results["foo"] = new reference_filter_result_t[a.count];
a.coll_to_references = new std::map<std::string, reference_filter_result_t>[a.count] {};
for (size_t i = 0; i < a.count; i++) {
a.docs[i] = i;
auto& reference = a.coll_to_references[i];
// Having only one reference of each document for brevity.
auto& reference = a.reference_filter_results["foo"][i];
reference.count = 1;
reference.docs = new uint32_t[1];
reference.docs[0] = 10 - i;
auto reference_docs = new uint32_t[1];
reference_docs[0] = 10 - i;
reference["foo"] = reference_filter_result_t(1, reference_docs);
}
filter_result_t b;
b.count = 0;
uint32_t limit = 10;
b.docs = new uint32_t[limit];
b.reference_filter_results["bar"] = new reference_filter_result_t[limit];
b.coll_to_references = new std::map<std::string, reference_filter_result_t>[limit] {};
for (size_t i = 2; i < limit; i++) {
if (i % 3 == 0) {
b.docs[b.count] = i;
auto& reference = b.reference_filter_results["bar"][b.count++];
reference.count = 1;
reference.docs = new uint32_t[1];
reference.docs[0] = 2 * i;
auto& reference = b.coll_to_references[b.count++];
auto reference_docs = new uint32_t[1];
reference_docs[0] = 2 * i;
reference["bar"] = reference_filter_result_t(1, reference_docs);
}
}
@ -696,9 +697,9 @@ TEST_F(CollectionJoinTest, AndFilterResults_WithReferences) {
filter_result_t::and_filter_results(a, b, result);
ASSERT_EQ(2, result.count);
ASSERT_EQ(2, result.reference_filter_results.size());
ASSERT_EQ(1, result.reference_filter_results.count("foo"));
ASSERT_EQ(1, result.reference_filter_results.count("bar"));
ASSERT_EQ(2, result.coll_to_references[0].size());
ASSERT_EQ(1, result.coll_to_references[0].count("foo"));
ASSERT_EQ(1, result.coll_to_references[0].count("bar"));
std::vector<uint32_t> docs = {3, 6}, foo_reference = {7, 4}, bar_reference = {6, 12};
@ -706,10 +707,10 @@ TEST_F(CollectionJoinTest, AndFilterResults_WithReferences) {
ASSERT_EQ(docs[i], result.docs[i]);
// result should contain correct references to the foo and bar collection.
ASSERT_EQ(1, result.reference_filter_results["foo"][i].count);
ASSERT_EQ(foo_reference[i], result.reference_filter_results["foo"][i].docs[0]);
ASSERT_EQ(1, result.reference_filter_results["bar"][i].count);
ASSERT_EQ(bar_reference[i], result.reference_filter_results["bar"][i].docs[0]);
ASSERT_EQ(1, result.coll_to_references[i].at("foo").count);
ASSERT_EQ(foo_reference[i], result.coll_to_references[i].at("foo").docs[0]);
ASSERT_EQ(1, result.coll_to_references[i].at("bar").count);
ASSERT_EQ(bar_reference[i], result.coll_to_references[i].at("bar").docs[0]);
}
}
@ -728,7 +729,7 @@ TEST_F(CollectionJoinTest, OrFilterResults_NoReference) {
filter_result_t result1;
filter_result_t::or_filter_results(a, b, result1);
ASSERT_EQ(3, result1.count);
ASSERT_EQ(0, result1.reference_filter_results.size());
ASSERT_EQ(nullptr, result1.coll_to_references);
std::vector<uint32_t> expected = {3, 6, 9};
for (size_t i = 0; i < result1.count; i++) {
@ -745,7 +746,7 @@ TEST_F(CollectionJoinTest, OrFilterResults_NoReference) {
filter_result_t result2;
filter_result_t::or_filter_results(a, b, result2);
ASSERT_EQ(10, result2.count);
ASSERT_EQ(0, result2.reference_filter_results.size());
ASSERT_EQ(nullptr, result2.coll_to_references);
expected = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
for (size_t i = 0; i < result2.count; i++) {
@ -765,7 +766,7 @@ TEST_F(CollectionJoinTest, OrFilterResults_NoReference) {
// b.docs: [0..8], c.docs: [0, 4, 5]
filter_result_t::or_filter_results(b, c, result3);
ASSERT_EQ(9, result3.count);
ASSERT_EQ(0, result3.reference_filter_results.size());
ASSERT_EQ(nullptr, result3.coll_to_references);
expected = {0, 1, 2, 3, 4, 5, 6, 7, 8};
for(size_t i = 0; i < result3.count; i++) {
@ -779,15 +780,15 @@ TEST_F(CollectionJoinTest, OrFilterResults_WithReferences) {
a.count = 0;
a.docs = new uint32_t[limit];
a.reference_filter_results["foo"] = new reference_filter_result_t[limit];
a.coll_to_references = new std::map<std::string, reference_filter_result_t>[limit] {};
for (size_t i = 2; i < limit; i++) {
if (i % 3 == 0) {
a.docs[a.count] = i;
auto& reference = a.reference_filter_results["foo"][a.count++];
reference.count = 1;
reference.docs = new uint32_t[1];
reference.docs[0] = 2 * i;
auto& reference = a.coll_to_references[a.count++];
auto reference_docs = new uint32_t[1];
reference_docs[0] = 2 * i;
reference["foo"] = reference_filter_result_t(1, reference_docs);
}
}
@ -796,27 +797,27 @@ TEST_F(CollectionJoinTest, OrFilterResults_WithReferences) {
filter_result_t::or_filter_results(a, b, result1);
ASSERT_EQ(3, result1.count);
ASSERT_EQ(1, result1.reference_filter_results.size());
ASSERT_EQ(1, result1.reference_filter_results.count("foo"));
ASSERT_EQ(1, result1.coll_to_references[0].size());
ASSERT_EQ(1, result1.coll_to_references[0].count("foo"));
std::vector<uint32_t> expected = {3, 6, 9}, foo_reference = {6, 12, 18};
for (size_t i = 0; i < result1.count; i++) {
ASSERT_EQ(expected[i], result1.docs[i]);
ASSERT_EQ(1, result1.reference_filter_results["foo"][i].count);
ASSERT_EQ(foo_reference[i], result1.reference_filter_results["foo"][i].docs[0]);
ASSERT_EQ(1, result1.coll_to_references[i].at("foo").count);
ASSERT_EQ(foo_reference[i], result1.coll_to_references[i].at("foo").docs[0]);
}
b.count = 9;
b.docs = new uint32_t[b.count];
b.reference_filter_results["bar"] = new reference_filter_result_t[b.count];
b.coll_to_references = new std::map<std::string, reference_filter_result_t>[b.count] {};
for (size_t i = 0; i < b.count; i++) {
b.docs[i] = i;
auto& reference = b.reference_filter_results["bar"][i];
reference.count = 1;
reference.docs = new uint32_t[1];
reference.docs[0] = 10 - i;
auto& reference = b.coll_to_references[i];
auto reference_docs = new uint32_t[1];
reference_docs[0] = 10 - i;
reference["bar"] = reference_filter_result_t(1, reference_docs);
}
// a.docs: [3, 6, 9], b.docs: [0..8]
@ -833,18 +834,18 @@ TEST_F(CollectionJoinTest, OrFilterResults_WithReferences) {
ASSERT_EQ(expected[i], result2.docs[i]);
if (foo_map.count(i) != 0) {
ASSERT_EQ(1, result2.reference_filter_results["foo"][i].count);
ASSERT_EQ(foo_map[i], result2.reference_filter_results["foo"][i].docs[0]);
ASSERT_EQ(1, result2.coll_to_references[i].at("foo").count);
ASSERT_EQ(foo_map[i], result2.coll_to_references[i].at("foo").docs[0]);
} else {
// Reference count should be 0 for the docs that were not present in the a result.
ASSERT_EQ(0, result2.reference_filter_results["foo"][i].count);
// foo didn't have any reference to current doc.
ASSERT_EQ(0, result2.coll_to_references[i].count("foo"));
}
if (bar_map.count(i) != 0) {
ASSERT_EQ(1, result2.reference_filter_results["bar"][i].count);
ASSERT_EQ(bar_map[i], result2.reference_filter_results["bar"][i].docs[0]);
ASSERT_EQ(1, result2.coll_to_references[i].at("bar").count);
ASSERT_EQ(bar_map[i], result2.coll_to_references[i].at("bar").docs[0]);
} else {
ASSERT_EQ(0, result2.reference_filter_results["bar"][i].count);
ASSERT_EQ(0, result2.coll_to_references[i].count("bar"));
}
}
@ -853,15 +854,15 @@ TEST_F(CollectionJoinTest, OrFilterResults_WithReferences) {
std::map<uint32_t, uint32_t> baz_map = {{0, 2}, {4, 0}, {5, 8}};
c.count = baz_map.size();
c.docs = new uint32_t[baz_map.size()];
c.reference_filter_results["baz"] = new reference_filter_result_t[baz_map.size()];
c.coll_to_references = new std::map<std::string, reference_filter_result_t>[baz_map.size()] {};
auto j = 0;
for(auto i: baz_map) {
c.docs[j] = i.first;
auto& reference = c.reference_filter_results["baz"][j++];
reference.count = 1;
reference.docs = new uint32_t[1];
reference.docs[0] = i.second;
auto& reference = c.coll_to_references[j++];
auto reference_docs = new uint32_t[1];
reference_docs[0] = i.second;
reference["baz"] = reference_filter_result_t(1, reference_docs);
}
// b.docs: [0..8], c.docs: [0, 4, 5]
@ -873,17 +874,17 @@ TEST_F(CollectionJoinTest, OrFilterResults_WithReferences) {
ASSERT_EQ(expected[i], result3.docs[i]);
if (bar_map.count(i) != 0) {
ASSERT_EQ(1, result3.reference_filter_results["bar"][i].count);
ASSERT_EQ(bar_map[i], result3.reference_filter_results["bar"][i].docs[0]);
ASSERT_EQ(1, result3.coll_to_references[i].at("bar").count);
ASSERT_EQ(bar_map[i], result3.coll_to_references[i].at("bar").docs[0]);
} else {
ASSERT_EQ(0, result3.reference_filter_results["bar"][i].count);
ASSERT_EQ(0, result3.coll_to_references[i].count("bar"));
}
if (baz_map.count(i) != 0) {
ASSERT_EQ(1, result3.reference_filter_results["baz"][i].count);
ASSERT_EQ(baz_map[i], result3.reference_filter_results["baz"][i].docs[0]);
ASSERT_EQ(1, result3.coll_to_references[i].at("baz").count);
ASSERT_EQ(baz_map[i], result3.coll_to_references[i].at("baz").docs[0]);
} else {
ASSERT_EQ(0, result3.reference_filter_results["baz"][i].count);
ASSERT_EQ(0, result3.coll_to_references[i].count("baz"));
}
}
}