mirror of
https://github.com/typesense/typesense.git
synced 2025-05-20 21:52:23 +08:00
Merge pull request #1235 from happy-san/v0.26-facets
Refactor `filter_result_t::reference_filter_results`.
This commit is contained in:
commit
b69e2a2687
@ -14,13 +14,39 @@ struct reference_filter_result_t {
|
||||
uint32_t count = 0;
|
||||
uint32_t* docs = nullptr;
|
||||
|
||||
reference_filter_result_t& operator=(const reference_filter_result_t& obj) noexcept {
|
||||
if (&obj == this)
|
||||
return *this;
|
||||
explicit reference_filter_result_t(uint32_t count = 0, uint32_t* docs = nullptr) : count(count), docs(docs) {}
|
||||
|
||||
reference_filter_result_t(const reference_filter_result_t& obj) {
|
||||
if (&obj == this) {
|
||||
return;
|
||||
}
|
||||
|
||||
count = obj.count;
|
||||
docs = new uint32_t[count];
|
||||
memcpy(docs, obj.docs, count * sizeof(uint32_t));
|
||||
}
|
||||
|
||||
reference_filter_result_t& operator=(const reference_filter_result_t& obj) noexcept {
|
||||
if (&obj == this) {
|
||||
return *this;
|
||||
}
|
||||
|
||||
count = obj.count;
|
||||
docs = new uint32_t[count];
|
||||
memcpy(docs, obj.docs, count * sizeof(uint32_t));
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
reference_filter_result_t& operator=(reference_filter_result_t&& obj) noexcept {
|
||||
if (&obj == this) {
|
||||
return *this;
|
||||
}
|
||||
|
||||
count = obj.count;
|
||||
docs = obj.docs;
|
||||
|
||||
obj.docs = nullptr;
|
||||
|
||||
return *this;
|
||||
}
|
||||
@ -41,8 +67,9 @@ struct single_filter_result_t {
|
||||
seq_id(seq_id), reference_filter_results(std::move(reference_filter_results)) {}
|
||||
|
||||
single_filter_result_t(const single_filter_result_t& obj) {
|
||||
if (&obj == this)
|
||||
if (&obj == this) {
|
||||
return;
|
||||
}
|
||||
|
||||
seq_id = obj.seq_id;
|
||||
|
||||
@ -58,74 +85,63 @@ struct filter_result_t {
|
||||
uint32_t count = 0;
|
||||
uint32_t* docs = nullptr;
|
||||
// Collection name -> Reference filter result
|
||||
std::map<std::string, reference_filter_result_t*> reference_filter_results;
|
||||
std::map<std::string, reference_filter_result_t>* coll_to_references = nullptr;
|
||||
|
||||
filter_result_t() = default;
|
||||
|
||||
filter_result_t(uint32_t count, uint32_t* docs) : count(count), docs(docs) {}
|
||||
|
||||
filter_result_t(const filter_result_t& obj) {
|
||||
if (&obj == this)
|
||||
if (&obj == this) {
|
||||
return;
|
||||
}
|
||||
|
||||
count = obj.count;
|
||||
docs = new uint32_t[count];
|
||||
memcpy(docs, obj.docs, count * sizeof(uint32_t));
|
||||
|
||||
// Copy every collection's references.
|
||||
for (const auto &item: obj.reference_filter_results) {
|
||||
auto& ref_coll_name = item.first;
|
||||
reference_filter_results[ref_coll_name] = new reference_filter_result_t[count];
|
||||
for (uint32_t i = 0; i < count; i++) {
|
||||
reference_filter_results[ref_coll_name][i] = item.second[i];
|
||||
}
|
||||
}
|
||||
copy_references(obj, *this);
|
||||
}
|
||||
|
||||
filter_result_t& operator=(const filter_result_t& obj) noexcept {
|
||||
if (&obj == this)
|
||||
if (&obj == this) {
|
||||
return *this;
|
||||
}
|
||||
|
||||
count = obj.count;
|
||||
docs = new uint32_t[count];
|
||||
memcpy(docs, obj.docs, count * sizeof(uint32_t));
|
||||
|
||||
// Copy every collection's references.
|
||||
for (const auto &item: obj.reference_filter_results) {
|
||||
reference_filter_results[item.first] = new reference_filter_result_t[count];
|
||||
|
||||
for (uint32_t i = 0; i < count; i++) {
|
||||
reference_filter_results[item.first][i] = item.second[i];
|
||||
}
|
||||
}
|
||||
copy_references(obj, *this);
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
filter_result_t& operator=(filter_result_t&& obj) noexcept {
|
||||
if (&obj == this)
|
||||
if (&obj == this) {
|
||||
return *this;
|
||||
}
|
||||
|
||||
count = obj.count;
|
||||
docs = obj.docs;
|
||||
reference_filter_results = std::map(obj.reference_filter_results);
|
||||
coll_to_references = obj.coll_to_references;
|
||||
|
||||
obj.docs = nullptr;
|
||||
obj.reference_filter_results.clear();
|
||||
obj.coll_to_references = nullptr;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
~filter_result_t() {
|
||||
delete[] docs;
|
||||
for (const auto &item: reference_filter_results) {
|
||||
delete[] item.second;
|
||||
}
|
||||
delete[] coll_to_references;
|
||||
}
|
||||
|
||||
static void and_filter_results(const filter_result_t& a, const filter_result_t& b, filter_result_t& result);
|
||||
|
||||
static void or_filter_results(const filter_result_t& a, const filter_result_t& b, filter_result_t& result);
|
||||
|
||||
static void copy_references(const filter_result_t& from, filter_result_t& to);
|
||||
};
|
||||
|
||||
class filter_result_iterator_t {
|
||||
@ -175,7 +191,8 @@ private:
|
||||
explicit filter_result_iterator_t(uint32_t approx_filter_ids_length);
|
||||
|
||||
/// Collects n doc ids while advancing the iterator. The iterator may become invalid during this operation.
|
||||
void get_n_ids(const uint32_t& n, filter_result_t& result);
|
||||
/// **The references are moved from filter_result_iterator_t.
|
||||
void get_n_ids(const uint32_t& n, filter_result_t*& result);
|
||||
|
||||
public:
|
||||
uint32_t seq_id = 0;
|
||||
@ -221,11 +238,11 @@ public:
|
||||
void next();
|
||||
|
||||
/// Collects n doc ids while advancing the iterator. The ids present in excluded_result_ids are ignored. The
|
||||
/// iterator may become invalid during this operation.
|
||||
/// iterator may become invalid during this operation. **The references are moved from filter_result_iterator_t.
|
||||
void get_n_ids(const uint32_t& n,
|
||||
uint32_t& excluded_result_index,
|
||||
uint32_t const* const excluded_result_ids, const size_t& excluded_result_ids_size,
|
||||
filter_result_t& result);
|
||||
filter_result_t*& result);
|
||||
|
||||
/// Advances the iterator until the doc value reaches or just overshoots id. The iterator may become invalid during
|
||||
/// this operation.
|
||||
|
@ -13,6 +13,23 @@
|
||||
#include "posting.h"
|
||||
#include "collection_manager.h"
|
||||
|
||||
void filter_result_t::copy_references(const filter_result_t& from, filter_result_t& to) {
|
||||
if (from.coll_to_references == nullptr) {
|
||||
return;
|
||||
}
|
||||
|
||||
auto const& count = from.count;
|
||||
to.coll_to_references = new std::map<std::string, reference_filter_result_t>[count] {};
|
||||
for (uint32_t i = 0; i < count; i++) {
|
||||
if (from.coll_to_references[i].empty()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
auto& ref = to.coll_to_references[i];
|
||||
ref.insert(from.coll_to_references[i].begin(), from.coll_to_references[i].end());
|
||||
}
|
||||
}
|
||||
|
||||
void filter_result_t::and_filter_results(const filter_result_t& a, const filter_result_t& b, filter_result_t& result) {
|
||||
auto lenA = a.count, lenB = b.count;
|
||||
if (lenA == 0 || lenB == 0) {
|
||||
@ -25,16 +42,8 @@ void filter_result_t::and_filter_results(const filter_result_t& a, const filter_
|
||||
const uint32_t *endA = A + lenA;
|
||||
const uint32_t *endB = B + lenB;
|
||||
|
||||
// Add an entry of references in the result for each unique collection in a and b.
|
||||
for (auto const& item: a.reference_filter_results) {
|
||||
if (result.reference_filter_results.count(item.first) == 0) {
|
||||
result.reference_filter_results[item.first] = new reference_filter_result_t[std::min(lenA, lenB)];
|
||||
}
|
||||
}
|
||||
for (auto const& item: b.reference_filter_results) {
|
||||
if (result.reference_filter_results.count(item.first) == 0) {
|
||||
result.reference_filter_results[item.first] = new reference_filter_result_t[std::min(lenA, lenB)];
|
||||
}
|
||||
if (a.coll_to_references != nullptr || b.coll_to_references != nullptr) {
|
||||
result.coll_to_references = new std::map<std::string, reference_filter_result_t>[std::min(lenA, lenB)] {};
|
||||
}
|
||||
|
||||
while (true) {
|
||||
@ -54,12 +63,15 @@ void filter_result_t::and_filter_results(const filter_result_t& a, const filter_
|
||||
if (*A == *B) {
|
||||
*out = *A;
|
||||
|
||||
// Copy the references of the document from every collection into result.
|
||||
for (auto const& item: a.reference_filter_results) {
|
||||
result.reference_filter_results[item.first][out - result.docs] = item.second[A - a.docs];
|
||||
}
|
||||
for (auto const& item: b.reference_filter_results) {
|
||||
result.reference_filter_results[item.first][out - result.docs] = item.second[B - b.docs];
|
||||
if (result.coll_to_references != nullptr) {
|
||||
// Copy the references of the document from every collection into result.
|
||||
auto& ref = result.coll_to_references[out - result.docs];
|
||||
if (a.coll_to_references != nullptr) {
|
||||
ref.insert(a.coll_to_references[A - a.docs].begin(), a.coll_to_references[A - a.docs].end());
|
||||
}
|
||||
if (b.coll_to_references != nullptr) {
|
||||
ref.insert(b.coll_to_references[B - b.docs].begin(), b.coll_to_references[B - b.docs].end());
|
||||
}
|
||||
}
|
||||
|
||||
out++;
|
||||
@ -92,16 +104,8 @@ void filter_result_t::or_filter_results(const filter_result_t& a, const filter_r
|
||||
size_t indexA = 0, indexB = 0, res_index = 0, lenA = a.count, lenB = b.count;
|
||||
result.docs = new uint32_t[lenA + lenB];
|
||||
|
||||
// Add an entry of references in the result for each unique collection in a and b.
|
||||
for (auto const& item: a.reference_filter_results) {
|
||||
if (result.reference_filter_results.count(item.first) == 0) {
|
||||
result.reference_filter_results[item.first] = new reference_filter_result_t[lenA + lenB];
|
||||
}
|
||||
}
|
||||
for (auto const& item: b.reference_filter_results) {
|
||||
if (result.reference_filter_results.count(item.first) == 0) {
|
||||
result.reference_filter_results[item.first] = new reference_filter_result_t[lenA + lenB];
|
||||
}
|
||||
if (a.coll_to_references != nullptr || b.coll_to_references != nullptr) {
|
||||
result.coll_to_references = new std::map<std::string, reference_filter_result_t>[lenA + lenB] {};
|
||||
}
|
||||
|
||||
while (indexA < lenA && indexB < lenB) {
|
||||
@ -112,9 +116,10 @@ void filter_result_t::or_filter_results(const filter_result_t& a, const filter_r
|
||||
res_index++;
|
||||
}
|
||||
|
||||
// Copy references of the last result document from every collection in a.
|
||||
for (auto const& item: a.reference_filter_results) {
|
||||
result.reference_filter_results[item.first][res_index - 1] = item.second[indexA];
|
||||
if (a.coll_to_references != nullptr) {
|
||||
// Copy references of the last result document from every collection in a.
|
||||
auto &ref = result.coll_to_references[res_index - 1];
|
||||
ref.insert(a.coll_to_references[indexA].begin(), a.coll_to_references[indexA].end());
|
||||
}
|
||||
|
||||
indexA++;
|
||||
@ -124,8 +129,9 @@ void filter_result_t::or_filter_results(const filter_result_t& a, const filter_r
|
||||
res_index++;
|
||||
}
|
||||
|
||||
for (auto const& item: b.reference_filter_results) {
|
||||
result.reference_filter_results[item.first][res_index - 1] = item.second[indexB];
|
||||
if (b.coll_to_references != nullptr) {
|
||||
auto &ref = result.coll_to_references[res_index - 1];
|
||||
ref.insert(b.coll_to_references[indexB].begin(), b.coll_to_references[indexB].end());
|
||||
}
|
||||
|
||||
indexB++;
|
||||
@ -138,8 +144,9 @@ void filter_result_t::or_filter_results(const filter_result_t& a, const filter_r
|
||||
res_index++;
|
||||
}
|
||||
|
||||
for (auto const& item: a.reference_filter_results) {
|
||||
result.reference_filter_results[item.first][res_index - 1] = item.second[indexA];
|
||||
if (a.coll_to_references != nullptr) {
|
||||
auto &ref = result.coll_to_references[res_index - 1];
|
||||
ref.insert(a.coll_to_references[indexA].begin(), a.coll_to_references[indexA].end());
|
||||
}
|
||||
|
||||
indexA++;
|
||||
@ -151,8 +158,9 @@ void filter_result_t::or_filter_results(const filter_result_t& a, const filter_r
|
||||
res_index++;
|
||||
}
|
||||
|
||||
for (auto const& item: b.reference_filter_results) {
|
||||
result.reference_filter_results[item.first][res_index - 1] = item.second[indexB];
|
||||
if (b.coll_to_references != nullptr) {
|
||||
auto &ref = result.coll_to_references[res_index - 1];
|
||||
ref.insert(b.coll_to_references[indexB].begin(), b.coll_to_references[indexB].end());
|
||||
}
|
||||
|
||||
indexB++;
|
||||
@ -160,21 +168,27 @@ void filter_result_t::or_filter_results(const filter_result_t& a, const filter_r
|
||||
|
||||
result.count = res_index;
|
||||
|
||||
if (res_index == lenA + lenB) {
|
||||
return;
|
||||
}
|
||||
|
||||
// shrink fit
|
||||
auto out = new uint32_t[res_index];
|
||||
memcpy(out, result.docs, res_index * sizeof(uint32_t));
|
||||
delete[] result.docs;
|
||||
result.docs = out;
|
||||
|
||||
for (auto &item: result.reference_filter_results) {
|
||||
auto out_references = new reference_filter_result_t[res_index];
|
||||
|
||||
for (uint32_t i = 0; i < result.count; i++) {
|
||||
out_references[i] = item.second[i];
|
||||
}
|
||||
delete[] item.second;
|
||||
item.second = out_references;
|
||||
if (result.coll_to_references == nullptr) {
|
||||
return;
|
||||
}
|
||||
|
||||
auto out_references = new std::map<std::string, reference_filter_result_t>[res_index] {};
|
||||
for (uint32_t i = 0; i < res_index; i++) {
|
||||
auto& ref = out_references[i];
|
||||
ref.insert(result.coll_to_references[i].begin(), result.coll_to_references[i].end());
|
||||
}
|
||||
|
||||
result.coll_to_references = out_references;
|
||||
}
|
||||
|
||||
void filter_result_iterator_t::and_filter_iterators() {
|
||||
@ -410,8 +424,9 @@ void filter_result_iterator_t::next() {
|
||||
|
||||
seq_id = filter_result.docs[result_index];
|
||||
reference.clear();
|
||||
for (auto const& item: filter_result.reference_filter_results) {
|
||||
reference[item.first] = item.second[result_index];
|
||||
if (filter_result.coll_to_references != nullptr) {
|
||||
auto& ref = filter_result.coll_to_references[result_index];
|
||||
reference.insert(ref.begin(), ref.end());
|
||||
}
|
||||
|
||||
return;
|
||||
@ -666,8 +681,9 @@ void filter_result_iterator_t::init() {
|
||||
}
|
||||
|
||||
seq_id = filter_result.docs[result_index];
|
||||
for (auto const& item: filter_result.reference_filter_results) {
|
||||
reference[item.first] = item.second[result_index];
|
||||
if (filter_result.coll_to_references != nullptr) {
|
||||
auto& ref = filter_result.coll_to_references[result_index];
|
||||
reference.insert(ref.begin(), ref.end());
|
||||
}
|
||||
|
||||
is_filter_result_initialized = true;
|
||||
@ -1141,8 +1157,9 @@ void filter_result_iterator_t::skip_to(uint32_t id) {
|
||||
|
||||
seq_id = filter_result.docs[result_index];
|
||||
reference.clear();
|
||||
for (auto const& item: filter_result.reference_filter_results) {
|
||||
reference[item.first] = item.second[result_index];
|
||||
if (filter_result.coll_to_references != nullptr) {
|
||||
auto& ref = filter_result.coll_to_references[result_index];
|
||||
reference.insert(ref.begin(), ref.end());
|
||||
}
|
||||
|
||||
return;
|
||||
@ -1374,8 +1391,9 @@ void filter_result_iterator_t::reset() {
|
||||
seq_id = filter_result.docs[result_index];
|
||||
|
||||
reference.clear();
|
||||
for (auto const& item: filter_result.reference_filter_results) {
|
||||
reference[item.first] = item.second[result_index];
|
||||
if (filter_result.coll_to_references != nullptr) {
|
||||
auto& ref = filter_result.coll_to_references[result_index];
|
||||
reference.insert(ref.begin(), ref.end());
|
||||
}
|
||||
|
||||
is_valid = true;
|
||||
@ -1480,7 +1498,7 @@ void filter_result_iterator_t::and_scalar(const uint32_t* A, const uint32_t& len
|
||||
return;
|
||||
}
|
||||
|
||||
if (filter_result.reference_filter_results.empty()) {
|
||||
if (filter_result.coll_to_references == nullptr) {
|
||||
if (is_filter_result_initialized) {
|
||||
result.count = ArrayUtils::and_scalar(A, lenA, filter_result.docs, filter_result.count, &result.docs);
|
||||
return;
|
||||
@ -1528,16 +1546,15 @@ void filter_result_iterator_t::and_scalar(const uint32_t* A, const uint32_t& len
|
||||
|
||||
result.count = match_indexes.size();
|
||||
result.docs = new uint32_t[match_indexes.size()];
|
||||
for (auto const& item: filter_result.reference_filter_results) {
|
||||
result.reference_filter_results[item.first] = new reference_filter_result_t[match_indexes.size()];
|
||||
}
|
||||
result.coll_to_references = new std::map<std::string, reference_filter_result_t>[match_indexes.size()] {};
|
||||
|
||||
for (uint32_t i = 0; i < match_indexes.size(); i++) {
|
||||
auto const& match_index = match_indexes[i];
|
||||
result.docs[i] = filter_result.docs[match_index];
|
||||
for (auto const& item: filter_result.reference_filter_results) {
|
||||
result.reference_filter_results[item.first][i] = item.second[match_index];
|
||||
}
|
||||
|
||||
auto& result_reference = result.coll_to_references[i];
|
||||
result_reference.insert(filter_result.coll_to_references[match_index].begin(),
|
||||
filter_result.coll_to_references[match_index].end());
|
||||
}
|
||||
}
|
||||
|
||||
@ -1618,22 +1635,28 @@ filter_result_iterator_t &filter_result_iterator_t::operator=(filter_result_iter
|
||||
return *this;
|
||||
}
|
||||
|
||||
void filter_result_iterator_t::get_n_ids(const uint32_t& n, filter_result_t& result) {
|
||||
void filter_result_iterator_t::get_n_ids(const uint32_t& n, filter_result_t*& result) {
|
||||
if (!is_filter_result_initialized) {
|
||||
return;
|
||||
}
|
||||
|
||||
auto result_length = result.count = std::min(n, filter_result.count - result_index);
|
||||
result.docs = new uint32_t[result_length];
|
||||
for (const auto &item: filter_result.reference_filter_results) {
|
||||
result.reference_filter_results[item.first] = new reference_filter_result_t[result_length];
|
||||
auto result_length = result->count = std::min(n, filter_result.count - result_index);
|
||||
result->docs = new uint32_t[result_length];
|
||||
if (filter_result.coll_to_references != nullptr) {
|
||||
result->coll_to_references = new std::map<std::string, reference_filter_result_t>[result_length] {};
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < result_length; i++, result_index++) {
|
||||
result.docs[i] = filter_result.docs[result_index];
|
||||
for (const auto &item: filter_result.reference_filter_results) {
|
||||
result.reference_filter_results[item.first][i] = item.second[result_index];
|
||||
result->docs[i] = filter_result.docs[result_index];
|
||||
|
||||
if (filter_result.coll_to_references == nullptr) {
|
||||
continue;
|
||||
}
|
||||
|
||||
auto& result_reference = result->coll_to_references[i];
|
||||
// Moving references since get_n_ids is only called in wildcard search flow and filter_result_iterator is
|
||||
// not used afterwards.
|
||||
result_reference = std::move(filter_result.coll_to_references[result_index]);
|
||||
}
|
||||
|
||||
is_valid = result_index < filter_result.count;
|
||||
@ -1642,7 +1665,7 @@ void filter_result_iterator_t::get_n_ids(const uint32_t& n, filter_result_t& res
|
||||
void filter_result_iterator_t::get_n_ids(const uint32_t& n,
|
||||
uint32_t& excluded_result_index,
|
||||
uint32_t const* const excluded_result_ids, const size_t& excluded_result_ids_size,
|
||||
filter_result_t& result) {
|
||||
filter_result_t*& result) {
|
||||
if (excluded_result_ids == nullptr || excluded_result_ids_size == 0 ||
|
||||
excluded_result_index >= excluded_result_ids_size) {
|
||||
return get_n_ids(n, result);
|
||||
@ -1663,18 +1686,24 @@ void filter_result_iterator_t::get_n_ids(const uint32_t& n,
|
||||
}
|
||||
}
|
||||
|
||||
result.count = match_indexes.size();
|
||||
result.docs = new uint32_t[match_indexes.size()];
|
||||
for (auto const& item: filter_result.reference_filter_results) {
|
||||
result.reference_filter_results[item.first] = new reference_filter_result_t[match_indexes.size()];
|
||||
result->count = match_indexes.size();
|
||||
result->docs = new uint32_t[match_indexes.size()];
|
||||
if (filter_result.coll_to_references != nullptr) {
|
||||
result->coll_to_references = new std::map<std::string, reference_filter_result_t>[match_indexes.size()] {};
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < match_indexes.size(); i++) {
|
||||
auto const& match_index = match_indexes[i];
|
||||
result.docs[i] = filter_result.docs[match_index];
|
||||
for (auto const& item: filter_result.reference_filter_results) {
|
||||
result.reference_filter_results[item.first][i] = item.second[match_index];
|
||||
result->docs[i] = filter_result.docs[match_index];
|
||||
|
||||
if (filter_result.coll_to_references == nullptr) {
|
||||
continue;
|
||||
}
|
||||
|
||||
auto& result_reference = result->coll_to_references[i];
|
||||
// Moving references since get_n_ids is only called in wildcard search flow and filter_result_iterator is
|
||||
// not used afterwards.
|
||||
result_reference = std::move(filter_result.coll_to_references[match_index]);
|
||||
}
|
||||
|
||||
is_valid = result_index < filter_result.count;
|
||||
|
@ -1,5 +1,6 @@
|
||||
#include "index.h"
|
||||
|
||||
#include <memory>
|
||||
#include <numeric>
|
||||
#include <chrono>
|
||||
#include <set>
|
||||
@ -1697,29 +1698,61 @@ Option<bool> Index::do_reference_filtering_with_lock(filter_node_t* const filter
|
||||
uint32_t count = filter_result_iterator.to_filter_id_array(reference_docs);
|
||||
std::unique_ptr<uint32_t[]> docs_guard(reference_docs);
|
||||
|
||||
// doc id -> reference doc ids
|
||||
std::map<uint32_t, std::vector<uint32_t>> reference_map;
|
||||
for (uint32_t i = 0; i < count; i++) {
|
||||
auto reference_doc_id = reference_docs[i];
|
||||
auto doc_id = sort_index.at(reference_helper_field_name)->at(reference_doc_id);
|
||||
|
||||
reference_map[doc_id].push_back(reference_doc_id);
|
||||
if (count == 0) {
|
||||
return Option(true);
|
||||
}
|
||||
|
||||
filter_result.count = reference_map.size();
|
||||
filter_result.docs = new uint32_t[reference_map.size()];
|
||||
filter_result.reference_filter_results[collection_name] = new reference_filter_result_t[reference_map.size()];
|
||||
// Collect all the doc ids from the reference ids.
|
||||
std::vector<std::pair<uint32_t, uint32_t>> id_pairs;
|
||||
std::unordered_set<uint32_t> unique_doc_ids;
|
||||
auto const& ref_index = *sort_index.at(reference_helper_field_name);
|
||||
for (uint32_t i = 0; i < count; i++) {
|
||||
auto& reference_doc_id = reference_docs[i];
|
||||
auto doc_id = ref_index.at(reference_doc_id);
|
||||
|
||||
size_t doc_index = 0;
|
||||
for (auto &item: reference_map) {
|
||||
filter_result.docs[doc_index] = item.first;
|
||||
id_pairs.emplace_back(std::pair(doc_id, reference_doc_id));
|
||||
unique_doc_ids.insert(doc_id);
|
||||
}
|
||||
|
||||
auto& reference_result = filter_result.reference_filter_results[collection_name][doc_index];
|
||||
reference_result.count = item.second.size();
|
||||
reference_result.docs = new uint32_t[item.second.size()];
|
||||
std::copy(item.second.begin(), item.second.end(), reference_result.docs);
|
||||
std::sort(id_pairs.begin(), id_pairs.end(), [](auto const& left, auto const& right) {
|
||||
return left.first < right.first;
|
||||
});
|
||||
|
||||
doc_index++;
|
||||
filter_result.count = unique_doc_ids.size();
|
||||
filter_result.docs = new uint32_t[unique_doc_ids.size()];
|
||||
filter_result.coll_to_references = new std::map<std::string, reference_filter_result_t>[unique_doc_ids.size()] {};
|
||||
|
||||
std::vector<uint32_t> previous_doc_references;
|
||||
for (uint32_t i = 0, previous_doc = id_pairs[0].first + 1, result_index = 0; i < id_pairs.size(); i++) {
|
||||
auto const& current_doc = id_pairs[i].first;
|
||||
auto const& reference_doc_id = id_pairs[i].second;
|
||||
|
||||
if (current_doc != previous_doc) {
|
||||
filter_result.docs[result_index] = current_doc;
|
||||
if (result_index > 0) {
|
||||
auto& reference_result = filter_result.coll_to_references[result_index - 1];
|
||||
|
||||
auto r = reference_filter_result_t(previous_doc_references.size(), new uint32_t[previous_doc_references.size()]);
|
||||
std::copy(previous_doc_references.begin(), previous_doc_references.end(), r.docs);
|
||||
reference_result[collection_name] = std::move(r);
|
||||
|
||||
previous_doc_references.clear();
|
||||
}
|
||||
|
||||
result_index++;
|
||||
previous_doc = current_doc;
|
||||
previous_doc_references.push_back(reference_doc_id);
|
||||
} else {
|
||||
previous_doc_references.push_back(reference_doc_id);
|
||||
}
|
||||
}
|
||||
|
||||
if (!previous_doc_references.empty()) {
|
||||
auto& reference_result = filter_result.coll_to_references[filter_result.count - 1];
|
||||
|
||||
auto r = reference_filter_result_t(previous_doc_references.size(), new uint32_t[previous_doc_references.size()]);
|
||||
std::copy(previous_doc_references.begin(), previous_doc_references.end(), r.docs);
|
||||
reference_result[collection_name] = std::move(r);
|
||||
}
|
||||
|
||||
return Option(true);
|
||||
@ -2287,6 +2320,7 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
|
||||
filter_iterator_guard.reset(filter_result_iterator);
|
||||
|
||||
if (!do_phrase_search_op.ok()) {
|
||||
delete [] all_result_ids;
|
||||
return do_phrase_search_op;
|
||||
}
|
||||
|
||||
@ -4692,9 +4726,9 @@ Option<bool> Index::do_infix_search(const size_t num_search_fields, const std::v
|
||||
|
||||
for(size_t i = 0; i < raw_infix_ids_length; i++) {
|
||||
auto seq_id = raw_infix_ids[i];
|
||||
std::map<basic_string<char>, reference_filter_result_t> references;
|
||||
for (const auto& item: filtered_infix_ids.reference_filter_results) {
|
||||
references[item.first] = item.second[i];
|
||||
std::map<std::string, reference_filter_result_t> references;
|
||||
if (filtered_infix_ids.coll_to_references != nullptr) {
|
||||
references = std::move(filtered_infix_ids.coll_to_references[i]);
|
||||
}
|
||||
|
||||
int64_t match_score = 0;
|
||||
@ -5049,9 +5083,9 @@ Option<bool> Index::search_wildcard(filter_node_t const* const& filter_tree_root
|
||||
Option<bool>* compute_sort_score_statuses[num_threads];
|
||||
|
||||
for(size_t thread_id = 0; thread_id < num_threads && filter_result_iterator->is_valid; thread_id++) {
|
||||
filter_result_t batch_result;
|
||||
auto batch_result = new filter_result_t();
|
||||
filter_result_iterator->get_n_ids(window_size, excluded_result_index, exclude_token_ids,
|
||||
exclude_token_ids_size, batch_result);
|
||||
exclude_token_ids_size, batch_result);
|
||||
|
||||
num_queued++;
|
||||
|
||||
@ -5067,6 +5101,7 @@ Option<bool> Index::search_wildcard(filter_node_t const* const& filter_tree_root
|
||||
check_for_circuit_break,
|
||||
batch_result,
|
||||
&num_processed, &m_process, &cv_process, &compute_sort_score_status, collection_name]() {
|
||||
std::unique_ptr<filter_result_t> batch_result_guard(batch_result);
|
||||
|
||||
search_begin_us = parent_search_begin;
|
||||
search_stop_us = parent_search_stop_ms;
|
||||
@ -5074,11 +5109,11 @@ Option<bool> Index::search_wildcard(filter_node_t const* const& filter_tree_root
|
||||
|
||||
size_t filter_index = 0;
|
||||
|
||||
for(size_t i = 0; i < batch_result.count; i++) {
|
||||
const uint32_t seq_id = batch_result.docs[i];
|
||||
for(size_t i = 0; i < batch_result->count; i++) {
|
||||
const uint32_t seq_id = batch_result->docs[i];
|
||||
std::map<basic_string<char>, reference_filter_result_t> references;
|
||||
for (const auto& item: batch_result.reference_filter_results) {
|
||||
references[item.first] = item.second[i];
|
||||
if (batch_result->coll_to_references != nullptr) {
|
||||
references = std::move(batch_result->coll_to_references[i]);
|
||||
}
|
||||
|
||||
int64_t match_score = 0;
|
||||
|
@ -651,7 +651,7 @@ TEST_F(CollectionJoinTest, AndFilterResults_NoReference) {
|
||||
filter_result_t::and_filter_results(a, b, result);
|
||||
|
||||
ASSERT_EQ(2, result.count);
|
||||
ASSERT_EQ(0, result.reference_filter_results.size());
|
||||
ASSERT_EQ(nullptr, result.coll_to_references);
|
||||
|
||||
std::vector<uint32_t> docs = {3, 6};
|
||||
|
||||
@ -664,30 +664,31 @@ TEST_F(CollectionJoinTest, AndFilterResults_WithReferences) {
|
||||
filter_result_t a;
|
||||
a.count = 9;
|
||||
a.docs = new uint32_t[a.count];
|
||||
a.reference_filter_results["foo"] = new reference_filter_result_t[a.count];
|
||||
a.coll_to_references = new std::map<std::string, reference_filter_result_t>[a.count] {};
|
||||
|
||||
for (size_t i = 0; i < a.count; i++) {
|
||||
a.docs[i] = i;
|
||||
|
||||
auto& reference = a.coll_to_references[i];
|
||||
// Having only one reference of each document for brevity.
|
||||
auto& reference = a.reference_filter_results["foo"][i];
|
||||
reference.count = 1;
|
||||
reference.docs = new uint32_t[1];
|
||||
reference.docs[0] = 10 - i;
|
||||
auto reference_docs = new uint32_t[1];
|
||||
reference_docs[0] = 10 - i;
|
||||
reference["foo"] = reference_filter_result_t(1, reference_docs);
|
||||
}
|
||||
|
||||
filter_result_t b;
|
||||
b.count = 0;
|
||||
uint32_t limit = 10;
|
||||
b.docs = new uint32_t[limit];
|
||||
b.reference_filter_results["bar"] = new reference_filter_result_t[limit];
|
||||
b.coll_to_references = new std::map<std::string, reference_filter_result_t>[limit] {};
|
||||
for (size_t i = 2; i < limit; i++) {
|
||||
if (i % 3 == 0) {
|
||||
b.docs[b.count] = i;
|
||||
|
||||
auto& reference = b.reference_filter_results["bar"][b.count++];
|
||||
reference.count = 1;
|
||||
reference.docs = new uint32_t[1];
|
||||
reference.docs[0] = 2 * i;
|
||||
auto& reference = b.coll_to_references[b.count++];
|
||||
auto reference_docs = new uint32_t[1];
|
||||
reference_docs[0] = 2 * i;
|
||||
reference["bar"] = reference_filter_result_t(1, reference_docs);
|
||||
}
|
||||
}
|
||||
|
||||
@ -696,9 +697,9 @@ TEST_F(CollectionJoinTest, AndFilterResults_WithReferences) {
|
||||
filter_result_t::and_filter_results(a, b, result);
|
||||
|
||||
ASSERT_EQ(2, result.count);
|
||||
ASSERT_EQ(2, result.reference_filter_results.size());
|
||||
ASSERT_EQ(1, result.reference_filter_results.count("foo"));
|
||||
ASSERT_EQ(1, result.reference_filter_results.count("bar"));
|
||||
ASSERT_EQ(2, result.coll_to_references[0].size());
|
||||
ASSERT_EQ(1, result.coll_to_references[0].count("foo"));
|
||||
ASSERT_EQ(1, result.coll_to_references[0].count("bar"));
|
||||
|
||||
std::vector<uint32_t> docs = {3, 6}, foo_reference = {7, 4}, bar_reference = {6, 12};
|
||||
|
||||
@ -706,10 +707,10 @@ TEST_F(CollectionJoinTest, AndFilterResults_WithReferences) {
|
||||
ASSERT_EQ(docs[i], result.docs[i]);
|
||||
|
||||
// result should contain correct references to the foo and bar collection.
|
||||
ASSERT_EQ(1, result.reference_filter_results["foo"][i].count);
|
||||
ASSERT_EQ(foo_reference[i], result.reference_filter_results["foo"][i].docs[0]);
|
||||
ASSERT_EQ(1, result.reference_filter_results["bar"][i].count);
|
||||
ASSERT_EQ(bar_reference[i], result.reference_filter_results["bar"][i].docs[0]);
|
||||
ASSERT_EQ(1, result.coll_to_references[i].at("foo").count);
|
||||
ASSERT_EQ(foo_reference[i], result.coll_to_references[i].at("foo").docs[0]);
|
||||
ASSERT_EQ(1, result.coll_to_references[i].at("bar").count);
|
||||
ASSERT_EQ(bar_reference[i], result.coll_to_references[i].at("bar").docs[0]);
|
||||
}
|
||||
}
|
||||
|
||||
@ -728,7 +729,7 @@ TEST_F(CollectionJoinTest, OrFilterResults_NoReference) {
|
||||
filter_result_t result1;
|
||||
filter_result_t::or_filter_results(a, b, result1);
|
||||
ASSERT_EQ(3, result1.count);
|
||||
ASSERT_EQ(0, result1.reference_filter_results.size());
|
||||
ASSERT_EQ(nullptr, result1.coll_to_references);
|
||||
|
||||
std::vector<uint32_t> expected = {3, 6, 9};
|
||||
for (size_t i = 0; i < result1.count; i++) {
|
||||
@ -745,7 +746,7 @@ TEST_F(CollectionJoinTest, OrFilterResults_NoReference) {
|
||||
filter_result_t result2;
|
||||
filter_result_t::or_filter_results(a, b, result2);
|
||||
ASSERT_EQ(10, result2.count);
|
||||
ASSERT_EQ(0, result2.reference_filter_results.size());
|
||||
ASSERT_EQ(nullptr, result2.coll_to_references);
|
||||
|
||||
expected = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
|
||||
for (size_t i = 0; i < result2.count; i++) {
|
||||
@ -765,7 +766,7 @@ TEST_F(CollectionJoinTest, OrFilterResults_NoReference) {
|
||||
// b.docs: [0..8], c.docs: [0, 4, 5]
|
||||
filter_result_t::or_filter_results(b, c, result3);
|
||||
ASSERT_EQ(9, result3.count);
|
||||
ASSERT_EQ(0, result3.reference_filter_results.size());
|
||||
ASSERT_EQ(nullptr, result3.coll_to_references);
|
||||
|
||||
expected = {0, 1, 2, 3, 4, 5, 6, 7, 8};
|
||||
for(size_t i = 0; i < result3.count; i++) {
|
||||
@ -779,15 +780,15 @@ TEST_F(CollectionJoinTest, OrFilterResults_WithReferences) {
|
||||
|
||||
a.count = 0;
|
||||
a.docs = new uint32_t[limit];
|
||||
a.reference_filter_results["foo"] = new reference_filter_result_t[limit];
|
||||
a.coll_to_references = new std::map<std::string, reference_filter_result_t>[limit] {};
|
||||
for (size_t i = 2; i < limit; i++) {
|
||||
if (i % 3 == 0) {
|
||||
a.docs[a.count] = i;
|
||||
|
||||
auto& reference = a.reference_filter_results["foo"][a.count++];
|
||||
reference.count = 1;
|
||||
reference.docs = new uint32_t[1];
|
||||
reference.docs[0] = 2 * i;
|
||||
auto& reference = a.coll_to_references[a.count++];
|
||||
auto reference_docs = new uint32_t[1];
|
||||
reference_docs[0] = 2 * i;
|
||||
reference["foo"] = reference_filter_result_t(1, reference_docs);
|
||||
}
|
||||
}
|
||||
|
||||
@ -796,27 +797,27 @@ TEST_F(CollectionJoinTest, OrFilterResults_WithReferences) {
|
||||
filter_result_t::or_filter_results(a, b, result1);
|
||||
|
||||
ASSERT_EQ(3, result1.count);
|
||||
ASSERT_EQ(1, result1.reference_filter_results.size());
|
||||
ASSERT_EQ(1, result1.reference_filter_results.count("foo"));
|
||||
ASSERT_EQ(1, result1.coll_to_references[0].size());
|
||||
ASSERT_EQ(1, result1.coll_to_references[0].count("foo"));
|
||||
|
||||
std::vector<uint32_t> expected = {3, 6, 9}, foo_reference = {6, 12, 18};
|
||||
for (size_t i = 0; i < result1.count; i++) {
|
||||
ASSERT_EQ(expected[i], result1.docs[i]);
|
||||
|
||||
ASSERT_EQ(1, result1.reference_filter_results["foo"][i].count);
|
||||
ASSERT_EQ(foo_reference[i], result1.reference_filter_results["foo"][i].docs[0]);
|
||||
ASSERT_EQ(1, result1.coll_to_references[i].at("foo").count);
|
||||
ASSERT_EQ(foo_reference[i], result1.coll_to_references[i].at("foo").docs[0]);
|
||||
}
|
||||
|
||||
b.count = 9;
|
||||
b.docs = new uint32_t[b.count];
|
||||
b.reference_filter_results["bar"] = new reference_filter_result_t[b.count];
|
||||
b.coll_to_references = new std::map<std::string, reference_filter_result_t>[b.count] {};
|
||||
for (size_t i = 0; i < b.count; i++) {
|
||||
b.docs[i] = i;
|
||||
|
||||
auto& reference = b.reference_filter_results["bar"][i];
|
||||
reference.count = 1;
|
||||
reference.docs = new uint32_t[1];
|
||||
reference.docs[0] = 10 - i;
|
||||
auto& reference = b.coll_to_references[i];
|
||||
auto reference_docs = new uint32_t[1];
|
||||
reference_docs[0] = 10 - i;
|
||||
reference["bar"] = reference_filter_result_t(1, reference_docs);
|
||||
}
|
||||
|
||||
// a.docs: [3, 6, 9], b.docs: [0..8]
|
||||
@ -833,18 +834,18 @@ TEST_F(CollectionJoinTest, OrFilterResults_WithReferences) {
|
||||
ASSERT_EQ(expected[i], result2.docs[i]);
|
||||
|
||||
if (foo_map.count(i) != 0) {
|
||||
ASSERT_EQ(1, result2.reference_filter_results["foo"][i].count);
|
||||
ASSERT_EQ(foo_map[i], result2.reference_filter_results["foo"][i].docs[0]);
|
||||
ASSERT_EQ(1, result2.coll_to_references[i].at("foo").count);
|
||||
ASSERT_EQ(foo_map[i], result2.coll_to_references[i].at("foo").docs[0]);
|
||||
} else {
|
||||
// Reference count should be 0 for the docs that were not present in the a result.
|
||||
ASSERT_EQ(0, result2.reference_filter_results["foo"][i].count);
|
||||
// foo didn't have any reference to current doc.
|
||||
ASSERT_EQ(0, result2.coll_to_references[i].count("foo"));
|
||||
}
|
||||
|
||||
if (bar_map.count(i) != 0) {
|
||||
ASSERT_EQ(1, result2.reference_filter_results["bar"][i].count);
|
||||
ASSERT_EQ(bar_map[i], result2.reference_filter_results["bar"][i].docs[0]);
|
||||
ASSERT_EQ(1, result2.coll_to_references[i].at("bar").count);
|
||||
ASSERT_EQ(bar_map[i], result2.coll_to_references[i].at("bar").docs[0]);
|
||||
} else {
|
||||
ASSERT_EQ(0, result2.reference_filter_results["bar"][i].count);
|
||||
ASSERT_EQ(0, result2.coll_to_references[i].count("bar"));
|
||||
}
|
||||
}
|
||||
|
||||
@ -853,15 +854,15 @@ TEST_F(CollectionJoinTest, OrFilterResults_WithReferences) {
|
||||
std::map<uint32_t, uint32_t> baz_map = {{0, 2}, {4, 0}, {5, 8}};
|
||||
c.count = baz_map.size();
|
||||
c.docs = new uint32_t[baz_map.size()];
|
||||
c.reference_filter_results["baz"] = new reference_filter_result_t[baz_map.size()];
|
||||
c.coll_to_references = new std::map<std::string, reference_filter_result_t>[baz_map.size()] {};
|
||||
auto j = 0;
|
||||
for(auto i: baz_map) {
|
||||
c.docs[j] = i.first;
|
||||
|
||||
auto& reference = c.reference_filter_results["baz"][j++];
|
||||
reference.count = 1;
|
||||
reference.docs = new uint32_t[1];
|
||||
reference.docs[0] = i.second;
|
||||
auto& reference = c.coll_to_references[j++];
|
||||
auto reference_docs = new uint32_t[1];
|
||||
reference_docs[0] = i.second;
|
||||
reference["baz"] = reference_filter_result_t(1, reference_docs);
|
||||
}
|
||||
|
||||
// b.docs: [0..8], c.docs: [0, 4, 5]
|
||||
@ -873,17 +874,17 @@ TEST_F(CollectionJoinTest, OrFilterResults_WithReferences) {
|
||||
ASSERT_EQ(expected[i], result3.docs[i]);
|
||||
|
||||
if (bar_map.count(i) != 0) {
|
||||
ASSERT_EQ(1, result3.reference_filter_results["bar"][i].count);
|
||||
ASSERT_EQ(bar_map[i], result3.reference_filter_results["bar"][i].docs[0]);
|
||||
ASSERT_EQ(1, result3.coll_to_references[i].at("bar").count);
|
||||
ASSERT_EQ(bar_map[i], result3.coll_to_references[i].at("bar").docs[0]);
|
||||
} else {
|
||||
ASSERT_EQ(0, result3.reference_filter_results["bar"][i].count);
|
||||
ASSERT_EQ(0, result3.coll_to_references[i].count("bar"));
|
||||
}
|
||||
|
||||
if (baz_map.count(i) != 0) {
|
||||
ASSERT_EQ(1, result3.reference_filter_results["baz"][i].count);
|
||||
ASSERT_EQ(baz_map[i], result3.reference_filter_results["baz"][i].docs[0]);
|
||||
ASSERT_EQ(1, result3.coll_to_references[i].at("baz").count);
|
||||
ASSERT_EQ(baz_map[i], result3.coll_to_references[i].at("baz").docs[0]);
|
||||
} else {
|
||||
ASSERT_EQ(0, result3.reference_filter_results["baz"][i].count);
|
||||
ASSERT_EQ(0, result3.coll_to_references[i].count("baz"));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user