mirror of
https://github.com/typesense/typesense.git
synced 2025-05-19 21:22:25 +08:00
temp.
This commit is contained in:
parent
57908965ae
commit
c2211e914d
@ -358,8 +358,7 @@ public:
|
||||
|
||||
static Option<bool> prune_doc(nlohmann::json& doc, const tsl::htrie_set<char>& include_names,
|
||||
const tsl::htrie_set<char>& exclude_names, const std::string& parent_name = "", size_t depth = 0,
|
||||
const uint32_t doc_sequence_id = 0, const std::string& collection_name = "",
|
||||
const std::map<std::string, std::string>& reference_filter_map = {});
|
||||
const reference_filter_result_t* reference_filter_result = nullptr);
|
||||
|
||||
const Index* _get_index() const;
|
||||
|
||||
@ -448,14 +447,13 @@ public:
|
||||
const size_t facet_sample_percent = 100,
|
||||
const size_t facet_sample_threshold = 0) const;
|
||||
|
||||
Option<bool> get_filter_ids(const std::string & filter_query,
|
||||
std::vector<std::pair<size_t, uint32_t*>>& index_ids) const;
|
||||
Option<bool> get_filter_ids(const std::string & filter_query, filter_result_t& filter_result) const;
|
||||
|
||||
Option<std::string> get_reference_field(const std::string & collection_name) const;
|
||||
|
||||
Option<bool> get_reference_filter_ids(const std::string & filter_query,
|
||||
const std::string & collection_name,
|
||||
std::pair<uint32_t, uint32_t*>& reference_index_ids) const;
|
||||
filter_result_t& filter_result,
|
||||
const std::string & collection_name) const;
|
||||
|
||||
Option<bool> validate_reference_filter(const std::string& filter_query) const;
|
||||
|
||||
|
@ -565,6 +565,26 @@ struct filter_node_t {
|
||||
}
|
||||
};
|
||||
|
||||
struct reference_filter_result_t {
|
||||
uint32_t count = 0;
|
||||
uint32_t* docs = nullptr;
|
||||
|
||||
~reference_filter_result_t() {
|
||||
delete[] docs;
|
||||
}
|
||||
};
|
||||
|
||||
struct filter_result_t {
|
||||
uint32_t count = 0;
|
||||
uint32_t* docs = nullptr;
|
||||
reference_filter_result_t* reference_filter_result = nullptr;
|
||||
|
||||
~filter_result_t() {
|
||||
delete[] docs;
|
||||
delete[] reference_filter_result;
|
||||
}
|
||||
};
|
||||
|
||||
namespace sort_field_const {
|
||||
static const std::string name = "name";
|
||||
static const std::string order = "order";
|
||||
|
@ -484,30 +484,25 @@ private:
|
||||
uint32_t*& ids,
|
||||
size_t& ids_len) const;
|
||||
|
||||
void do_filtering(uint32_t*& filter_ids,
|
||||
uint32_t& filter_ids_length,
|
||||
filter_node_t* const root,
|
||||
const std::string& collection_name) const;
|
||||
Option<bool> do_filtering(filter_node_t* const root,
|
||||
filter_result_t& result,
|
||||
const std::string& collection_name = "") const;
|
||||
|
||||
void rearranging_recursive_filter (uint32_t*& filter_ids,
|
||||
Option<bool> rearranging_recursive_filter (filter_node_t* const filter_tree_root,
|
||||
filter_result_t& result,
|
||||
const std::string& collection_name = "") const;
|
||||
|
||||
Option<bool> recursive_filter(filter_node_t* const root,
|
||||
filter_result_t& result,
|
||||
const std::string& collection_name = "") const;
|
||||
|
||||
Option<bool> adaptive_filter(filter_node_t* const filter_tree_root,
|
||||
filter_result_t& result,
|
||||
const std::string& collection_name = "") const;
|
||||
|
||||
Option<bool> rearrange_filter_tree(filter_node_t* const root,
|
||||
uint32_t& filter_ids_length,
|
||||
filter_node_t* const root,
|
||||
const std::string& collection_name) const;
|
||||
|
||||
void recursive_filter(uint32_t*& filter_ids,
|
||||
uint32_t& filter_ids_length,
|
||||
filter_node_t* const root,
|
||||
const std::string& collection_name) const;
|
||||
|
||||
void adaptive_filter(uint32_t*& filter_ids,
|
||||
uint32_t& filter_ids_length,
|
||||
filter_node_t* const filter_tree_root,
|
||||
const std::string& collection_name = "") const;
|
||||
|
||||
void rearrange_filter_tree(uint32_t*& filter_ids,
|
||||
uint32_t& filter_ids_length,
|
||||
filter_node_t* const root,
|
||||
const std::string& collection_name) const;
|
||||
const std::string& collection_name = "") const;
|
||||
|
||||
void insert_doc(const int64_t score, art_tree *t, uint32_t seq_id,
|
||||
const std::unordered_map<std::string, std::vector<uint32_t>> &token_to_offsets) const;
|
||||
@ -665,9 +660,9 @@ public:
|
||||
|
||||
// Public operations
|
||||
|
||||
void run_search(search_args* search_params, const std::string& collection_name);
|
||||
Option<bool> run_search(search_args* search_params, const std::string& collection_name);
|
||||
|
||||
void search(std::vector<query_tokens_t>& field_query_tokens, const std::vector<search_field_t>& the_fields,
|
||||
Option<bool> search(std::vector<query_tokens_t>& field_query_tokens, const std::vector<search_field_t>& the_fields,
|
||||
const text_match_type_t match_type,
|
||||
filter_node_t* filter_tree_root, std::vector<facet>& facets, facet_query_t& facet_query,
|
||||
const std::vector<std::pair<uint32_t, uint32_t>>& included_ids,
|
||||
@ -727,15 +722,13 @@ public:
|
||||
|
||||
art_leaf* get_token_leaf(const std::string & field_name, const unsigned char* token, uint32_t token_len);
|
||||
|
||||
void do_filtering_with_lock(
|
||||
uint32_t*& filter_ids,
|
||||
uint32_t& filter_ids_length,
|
||||
filter_node_t* filter_tree_root,
|
||||
const std::string& collection_name) const;
|
||||
Option<bool> do_filtering_with_lock(filter_node_t* const filter_tree_root,
|
||||
filter_result_t& filter_result,
|
||||
const std::string& collection_name = "") const;
|
||||
|
||||
void do_reference_filtering_with_lock(std::pair<uint32_t, uint32_t*>& reference_index_ids,
|
||||
filter_node_t* filter_tree_root,
|
||||
const std::string& reference_helper_field_name) const;
|
||||
Option<bool> do_reference_filtering_with_lock(filter_node_t* const filter_tree_root,
|
||||
filter_result_t& filter_result,
|
||||
const std::string & reference_helper_field_name) const;
|
||||
|
||||
void refresh_schemas(const std::vector<field>& new_fields, const std::vector<field>& del_fields);
|
||||
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include <cstdio>
|
||||
#include <algorithm>
|
||||
#include <unordered_map>
|
||||
#include <field.h>
|
||||
|
||||
struct KV {
|
||||
int8_t match_score_index{};
|
||||
@ -13,6 +14,7 @@ struct KV {
|
||||
uint64_t key{};
|
||||
uint64_t distinct_key{};
|
||||
int64_t scores[3]{}; // match score + 2 custom attributes
|
||||
reference_filter_result_t* reference_filter_result;
|
||||
|
||||
// to be used only in final aggregation
|
||||
uint64_t* query_indices = nullptr;
|
||||
|
@ -132,22 +132,20 @@ Option<doc_seq_id_t> Collection::to_doc(const std::string & json_str, nlohmann::
|
||||
"` in the collection `" + reference_collection_name + "` must be indexed.");
|
||||
}
|
||||
|
||||
std::vector<std::pair<size_t, uint32_t*>> documents;
|
||||
auto value = document[field_name].get<std::string>();
|
||||
collection->get_filter_ids(reference_field_name + ":=" + value, documents);
|
||||
filter_result_t filter_result;
|
||||
collection->get_filter_ids(reference_field_name + ":=" + value, filter_result);
|
||||
|
||||
if (documents[0].first != 1) {
|
||||
delete [] documents[0].second;
|
||||
if (filter_result.count != 1) {
|
||||
auto match = " `" + reference_field_name + ": " + value + "` ";
|
||||
return Option<doc_seq_id_t>(400, documents[0].first < 1 ?
|
||||
return Option<doc_seq_id_t>(400, filter_result.count < 1 ?
|
||||
"Referenced document having" + match + "not found in the collection `"
|
||||
+ reference_collection_name + "`." :
|
||||
"Multiple documents having" + match + "found in the collection `" +
|
||||
reference_collection_name + "`.");
|
||||
}
|
||||
|
||||
document[field_name + REFERENCE_HELPER_FIELD_SUFFIX] = *(documents[0].second);
|
||||
delete [] documents[0].second;
|
||||
document[field_name + REFERENCE_HELPER_FIELD_SUFFIX] = filter_result.docs[0];
|
||||
}
|
||||
|
||||
return Option<doc_seq_id_t>(doc_seq_id_t{seq_id, true});
|
||||
@ -442,15 +440,15 @@ Option<nlohmann::json> Collection::update_matching_filter(const std::string& fil
|
||||
delete iter_upper_bound;
|
||||
delete it;
|
||||
} else {
|
||||
std::vector<std::pair<size_t, uint32_t*>> filter_ids;
|
||||
auto filter_ids_op = get_filter_ids(_filter_query, filter_ids);
|
||||
filter_result_t filter_result;
|
||||
auto filter_ids_op = get_filter_ids(_filter_query, filter_result);
|
||||
if(!filter_ids_op.ok()) {
|
||||
return Option<nlohmann::json>(filter_ids_op.code(), filter_ids_op.error());
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < filter_ids[0].first;) {
|
||||
for (int buffer_counter = 0; buffer_counter < batch_size && i < filter_ids[0].first;) {
|
||||
uint32_t seq_id = *(filter_ids[0].second + i++);
|
||||
for (size_t i = 0; i < filter_result.count;) {
|
||||
for (int buffer_counter = 0; buffer_counter < batch_size && i < filter_result.count;) {
|
||||
uint32_t seq_id = filter_result.docs[i++];
|
||||
nlohmann::json existing_document;
|
||||
|
||||
auto get_doc_op = get_document_from_store(get_seq_id_key(seq_id), existing_document);
|
||||
@ -467,8 +465,6 @@ Option<nlohmann::json> Collection::update_matching_filter(const std::string& fil
|
||||
docs_updated_count += res["num_imported"].get<size_t>();
|
||||
buffer.clear();
|
||||
}
|
||||
|
||||
delete [] filter_ids[0].second;
|
||||
}
|
||||
|
||||
nlohmann::json resp_summary;
|
||||
@ -997,19 +993,6 @@ Option<bool> Collection::extract_field_name(const std::string& field_name,
|
||||
return Option<bool>(true);
|
||||
}
|
||||
|
||||
void get_reference_filters(filter_node_t const* const root, std::map<std::string, std::string>& reference_filter_map) {
|
||||
if (root == nullptr) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!root->isOperator && !root->filter_exp.referenced_collection_name.empty()) {
|
||||
reference_filter_map[root->filter_exp.referenced_collection_name] = root->filter_exp.field_name;
|
||||
}
|
||||
|
||||
get_reference_filters(root->left, reference_filter_map);
|
||||
get_reference_filters(root->right, reference_filter_map);
|
||||
}
|
||||
|
||||
Option<nlohmann::json> Collection::search(const std::string & raw_query,
|
||||
const std::vector<std::string>& raw_search_fields,
|
||||
const std::string & filter_query, const std::vector<std::string>& facet_fields,
|
||||
@ -1468,7 +1451,10 @@ Option<nlohmann::json> Collection::search(const std::string & raw_query,
|
||||
filter_curated_hits, split_join_tokens, vector_query,
|
||||
facet_sample_percent, facet_sample_threshold);
|
||||
|
||||
index->run_search(search_params, name);
|
||||
auto search_op = index->run_search(search_params, name);
|
||||
if (!search_op.ok()) {
|
||||
return Option<nlohmann::json>(search_op.code(), search_op.error());
|
||||
}
|
||||
|
||||
// for grouping we have to re-aggregate
|
||||
|
||||
@ -1771,16 +1757,12 @@ Option<nlohmann::json> Collection::search(const std::string & raw_query,
|
||||
return Option<nlohmann::json>(doc_id_op.code(), doc_id_op.error());
|
||||
}
|
||||
|
||||
std::map<std::string, std::string> reference_filter_map;
|
||||
get_reference_filters(filter_tree_root, reference_filter_map);
|
||||
auto prune_op = prune_doc(document,
|
||||
include_fields_full,
|
||||
exclude_fields_full,
|
||||
"",
|
||||
0,
|
||||
doc_id_op.get(),
|
||||
name,
|
||||
reference_filter_map);
|
||||
include_fields_full,
|
||||
exclude_fields_full,
|
||||
"",
|
||||
0,
|
||||
field_order_kv->reference_filter_result);
|
||||
if (!prune_op.ok()) {
|
||||
return Option<nlohmann::json>(prune_op.code(), prune_op.error());
|
||||
}
|
||||
@ -2426,23 +2408,18 @@ void Collection::populate_result_kvs(Topster *topster, std::vector<std::vector<K
|
||||
}
|
||||
}
|
||||
|
||||
Option<bool> Collection::get_filter_ids(const std::string & filter_query,
|
||||
std::vector<std::pair<size_t, uint32_t*>>& index_ids) const {
|
||||
Option<bool> Collection::get_filter_ids(const std::string& filter_query, filter_result_t& filter_result) const {
|
||||
std::shared_lock lock(mutex);
|
||||
|
||||
const std::string doc_id_prefix = std::to_string(collection_id) + "_" + DOC_ID_PREFIX + "_";
|
||||
filter_node_t* filter_tree_root = nullptr;
|
||||
Option<bool> filter_op = filter::parse_filter_query(filter_query, search_schema,
|
||||
store, doc_id_prefix, filter_tree_root);
|
||||
|
||||
if(!filter_op.ok()) {
|
||||
return filter_op;
|
||||
}
|
||||
|
||||
uint32_t* filter_ids = nullptr;
|
||||
uint32_t filter_ids_len = 0;
|
||||
index->do_filtering_with_lock(filter_ids, filter_ids_len, filter_tree_root, name);
|
||||
index_ids.emplace_back(filter_ids_len, filter_ids);
|
||||
index->do_filtering_with_lock(filter_tree_root, filter_result, name);
|
||||
|
||||
delete filter_tree_root;
|
||||
return Option<bool>(true);
|
||||
@ -2469,8 +2446,8 @@ Option<std::string> Collection::get_reference_field(const std::string & collecti
|
||||
}
|
||||
|
||||
Option<bool> Collection::get_reference_filter_ids(const std::string & filter_query,
|
||||
const std::string & collection_name,
|
||||
std::pair<uint32_t, uint32_t*>& reference_index_ids) const {
|
||||
filter_result_t& filter_result,
|
||||
const std::string & collection_name) const {
|
||||
auto reference_field_op = get_reference_field(collection_name);
|
||||
if (!reference_field_op.ok()) {
|
||||
return Option<bool>(reference_field_op.code(), reference_field_op.error());
|
||||
@ -2480,15 +2457,18 @@ Option<bool> Collection::get_reference_filter_ids(const std::string & filter_que
|
||||
|
||||
const std::string doc_id_prefix = std::to_string(collection_id) + "_" + DOC_ID_PREFIX + "_";
|
||||
filter_node_t* filter_tree_root = nullptr;
|
||||
Option<bool> filter_op = filter::parse_filter_query(filter_query, search_schema,
|
||||
store, doc_id_prefix, filter_tree_root);
|
||||
if(!filter_op.ok()) {
|
||||
return filter_op;
|
||||
Option<bool> parse_op = filter::parse_filter_query(filter_query, search_schema,
|
||||
store, doc_id_prefix, filter_tree_root);
|
||||
if(!parse_op.ok()) {
|
||||
return parse_op;
|
||||
}
|
||||
|
||||
// Reference helper field has the sequence id of other collection's documents.
|
||||
auto field_name = reference_field_op.get() + REFERENCE_HELPER_FIELD_SUFFIX;
|
||||
index->do_reference_filtering_with_lock(reference_index_ids, filter_tree_root, field_name);
|
||||
auto filter_op = index->do_reference_filtering_with_lock(filter_tree_root, filter_result, field_name);
|
||||
if (!filter_op.ok()) {
|
||||
return filter_op;
|
||||
}
|
||||
|
||||
delete filter_tree_root;
|
||||
return Option<bool>(true);
|
||||
@ -3732,11 +3712,10 @@ void Collection::remove_flat_fields(nlohmann::json& document) {
|
||||
}
|
||||
|
||||
Option<bool> Collection::prune_doc(nlohmann::json& doc,
|
||||
const tsl::htrie_set<char>& include_names,
|
||||
const tsl::htrie_set<char>& exclude_names,
|
||||
const std::string& parent_name, size_t depth,
|
||||
const uint32_t doc_sequence_id, const std::string& collection_name,
|
||||
const std::map<std::string, std::string>& reference_filter_map) {
|
||||
const tsl::htrie_set<char>& include_names,
|
||||
const tsl::htrie_set<char>& exclude_names,
|
||||
const std::string& parent_name, size_t depth,
|
||||
const reference_filter_result_t* reference_filter_result) {
|
||||
// doc can only be an object
|
||||
auto it = doc.begin();
|
||||
while(it != doc.end()) {
|
||||
@ -3813,6 +3792,10 @@ Option<bool> Collection::prune_doc(nlohmann::json& doc,
|
||||
it++;
|
||||
}
|
||||
|
||||
if (reference_filter_result == nullptr) {
|
||||
return Option<bool>(true);
|
||||
}
|
||||
|
||||
auto reference_it = include_names.equal_prefix_range("$");
|
||||
for (auto reference = reference_it.first; reference != reference_it.second; reference++) {
|
||||
auto ref = reference.key();
|
||||
@ -3840,31 +3823,10 @@ Option<bool> Collection::prune_doc(nlohmann::json& doc,
|
||||
return include_exclude_op;
|
||||
}
|
||||
|
||||
auto reference_field_op = collection->get_reference_field(collection_name);
|
||||
if (!reference_field_op.ok()) {
|
||||
return Option<bool>(reference_field_op.code(), reference_field_op.error());
|
||||
}
|
||||
|
||||
std::vector<std::pair<size_t, uint32_t*>> documents;
|
||||
auto filter = reference_field_op.get() + REFERENCE_HELPER_FIELD_SUFFIX + ":=" + std::to_string(doc_sequence_id);
|
||||
if (reference_filter_map.count(ref_collection_name) > 0) {
|
||||
filter += "&&";
|
||||
filter += reference_filter_map.at(ref_collection_name);
|
||||
}
|
||||
auto filter_op = collection->get_filter_ids(filter, documents);
|
||||
if (!filter_op.ok()) {
|
||||
return filter_op;
|
||||
}
|
||||
|
||||
if (documents[0].first == 0) {
|
||||
delete[] documents[0].second;
|
||||
continue;
|
||||
}
|
||||
|
||||
std::vector<nlohmann::json> reference_docs;
|
||||
reference_docs.reserve(documents[0].first);
|
||||
for (size_t i = 0; i < documents[0].first; i++) {
|
||||
auto doc_seq_id = documents[0].second[i];
|
||||
reference_docs.reserve(reference_filter_result->count);
|
||||
for (size_t i = 0; i < reference_filter_result->count; i++) {
|
||||
auto doc_seq_id = reference_filter_result->docs[i];
|
||||
|
||||
nlohmann::json ref_doc;
|
||||
auto get_doc_op = collection->get_document_from_store(doc_seq_id, ref_doc);
|
||||
@ -3880,8 +3842,6 @@ Option<bool> Collection::prune_doc(nlohmann::json& doc,
|
||||
reference_docs.push_back(ref_doc);
|
||||
}
|
||||
|
||||
delete[] documents[0].second;
|
||||
|
||||
for (const auto &ref_doc: reference_docs) {
|
||||
doc.update(ref_doc);
|
||||
}
|
||||
|
@ -634,7 +634,8 @@ bool get_export_documents(const std::shared_ptr<http_req>& req, const std::share
|
||||
export_state->iter_upper_bound = new rocksdb::Slice(export_state->iter_upper_bound_key);
|
||||
export_state->it = collectionManager.get_store()->scan(seq_id_prefix, export_state->iter_upper_bound);
|
||||
} else {
|
||||
auto filter_ids_op = collection->get_filter_ids(simple_filter_query, export_state->index_ids);
|
||||
filter_result_t filter_result;
|
||||
auto filter_ids_op = collection->get_filter_ids(simple_filter_query, filter_result);
|
||||
|
||||
if(!filter_ids_op.ok()) {
|
||||
res->set(filter_ids_op.code(), filter_ids_op.error());
|
||||
@ -644,6 +645,9 @@ bool get_export_documents(const std::shared_ptr<http_req>& req, const std::share
|
||||
return false;
|
||||
}
|
||||
|
||||
export_state->index_ids.emplace_back(filter_result.count, filter_result.docs);
|
||||
filter_result.docs = nullptr;
|
||||
|
||||
for(size_t i=0; i<export_state->index_ids.size(); i++) {
|
||||
export_state->offsets.push_back(0);
|
||||
}
|
||||
@ -1082,7 +1086,8 @@ bool del_remove_documents(const std::shared_ptr<http_req>& req, const std::share
|
||||
// destruction of data is managed by req destructor
|
||||
req->data = deletion_state;
|
||||
|
||||
auto filter_ids_op = collection->get_filter_ids(simple_filter_query, deletion_state->index_ids);
|
||||
filter_result_t filter_result;
|
||||
auto filter_ids_op = collection->get_filter_ids(simple_filter_query, filter_result);
|
||||
|
||||
if(!filter_ids_op.ok()) {
|
||||
res->set(filter_ids_op.code(), filter_ids_op.error());
|
||||
@ -1092,6 +1097,9 @@ bool del_remove_documents(const std::shared_ptr<http_req>& req, const std::share
|
||||
return false;
|
||||
}
|
||||
|
||||
deletion_state->index_ids.emplace_back(filter_result.count, filter_result.docs);
|
||||
filter_result.docs = nullptr;
|
||||
|
||||
for(size_t i=0; i<deletion_state->index_ids.size(); i++) {
|
||||
deletion_state->offsets.push_back(0);
|
||||
}
|
||||
|
323
src/index.cpp
323
src/index.cpp
@ -1617,10 +1617,9 @@ void Index::numeric_not_equals_filter(num_tree_t* const num_tree,
|
||||
ids = out;
|
||||
}
|
||||
|
||||
void Index::do_filtering(uint32_t*& filter_ids,
|
||||
uint32_t& filter_ids_length,
|
||||
filter_node_t* const root,
|
||||
const std::string& collection_name) const {
|
||||
Option<bool> Index::do_filtering(filter_node_t* const root,
|
||||
filter_result_t& result,
|
||||
const std::string& collection_name) const {
|
||||
// auto begin = std::chrono::high_resolution_clock::now();
|
||||
const filter a_filter = root->filter_exp;
|
||||
|
||||
@ -1629,19 +1628,17 @@ void Index::do_filtering(uint32_t*& filter_ids,
|
||||
// Apply filter on referenced collection and get the sequence ids of current collection from the filtered documents.
|
||||
auto& cm = CollectionManager::get_instance();
|
||||
auto collection = cm.get_collection(a_filter.referenced_collection_name);
|
||||
|
||||
std::pair<uint32_t, uint32_t*> reference_index_ids;
|
||||
auto op = collection->get_reference_filter_ids(a_filter.field_name,
|
||||
collection_name,
|
||||
reference_index_ids);
|
||||
if (!op.ok()) {
|
||||
return;
|
||||
if (collection == nullptr) {
|
||||
return Option<bool>(400, "Referenced collection `" + a_filter.referenced_collection_name + "` not found.");
|
||||
}
|
||||
auto reference_filter_op = collection->get_reference_filter_ids(a_filter.field_name,
|
||||
result,
|
||||
collection_name);
|
||||
if (!reference_filter_op.ok()) {
|
||||
return reference_filter_op;
|
||||
}
|
||||
|
||||
filter_ids_length = reference_index_ids.first;
|
||||
filter_ids = reference_index_ids.second;
|
||||
|
||||
return;
|
||||
return Option(true);
|
||||
}
|
||||
|
||||
if (a_filter.field_name == "id") {
|
||||
@ -1653,19 +1650,11 @@ void Index::do_filtering(uint32_t*& filter_ids,
|
||||
|
||||
std::sort(result_ids.begin(), result_ids.end());
|
||||
|
||||
if (filter_ids_length == 0) {
|
||||
filter_ids = new uint32[result_ids.size()];
|
||||
std::copy(result_ids.begin(), result_ids.end(), filter_ids);
|
||||
filter_ids_length = result_ids.size();
|
||||
} else {
|
||||
uint32_t* filtered_results = nullptr;
|
||||
filter_ids_length = ArrayUtils::and_scalar(filter_ids, filter_ids_length, &result_ids[0],
|
||||
result_ids.size(), &filtered_results);
|
||||
delete[] filter_ids;
|
||||
filter_ids = filtered_results;
|
||||
}
|
||||
result.docs = new uint32[result_ids.size()];
|
||||
std::copy(result_ids.begin(), result_ids.end(), result.docs);
|
||||
result.count = result_ids.size();
|
||||
|
||||
return;
|
||||
return Option(true);
|
||||
}
|
||||
|
||||
bool has_search_index = search_index.count(a_filter.field_name) != 0 ||
|
||||
@ -1673,7 +1662,7 @@ void Index::do_filtering(uint32_t*& filter_ids,
|
||||
geopoint_index.count(a_filter.field_name) != 0;
|
||||
|
||||
if (!has_search_index) {
|
||||
return;
|
||||
return Option(true);
|
||||
}
|
||||
|
||||
field f = search_schema.at(a_filter.field_name);
|
||||
@ -1963,9 +1952,10 @@ void Index::do_filtering(uint32_t*& filter_ids,
|
||||
result_ids_len = to_include_ids_len;
|
||||
}
|
||||
|
||||
filter_ids = result_ids;
|
||||
filter_ids_length = result_ids_len;
|
||||
result.docs = result_ids;
|
||||
result.count = result_ids_len;
|
||||
|
||||
return Option(true);
|
||||
/*long long int timeMillis =
|
||||
std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now()
|
||||
- begin).count();
|
||||
@ -1973,25 +1963,28 @@ void Index::do_filtering(uint32_t*& filter_ids,
|
||||
LOG(INFO) << "Time taken for filtering: " << timeMillis << "ms";*/
|
||||
}
|
||||
|
||||
void Index::rearrange_filter_tree(uint32_t*& filter_ids,
|
||||
uint32_t& filter_ids_length,
|
||||
filter_node_t* const root,
|
||||
const std::string& collection_name) const {
|
||||
Option<bool> Index::rearrange_filter_tree(filter_node_t* const root,
|
||||
uint32_t& filter_ids_length,
|
||||
const std::string& collection_name) const {
|
||||
if (root == nullptr) {
|
||||
return;
|
||||
return Option(true);
|
||||
}
|
||||
|
||||
if (root->isOperator) {
|
||||
uint32_t* l_filter_ids = nullptr;
|
||||
uint32_t l_filter_ids_length = 0;
|
||||
if (root->left != nullptr) {
|
||||
rearrange_filter_tree(l_filter_ids, l_filter_ids_length,root->left, collection_name);
|
||||
auto rearrange_op = rearrange_filter_tree(root->left, l_filter_ids_length, collection_name);
|
||||
if (!rearrange_op.ok()) {
|
||||
return rearrange_op;
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t* r_filter_ids = nullptr;
|
||||
uint32_t r_filter_ids_length = 0;
|
||||
if (root->right != nullptr) {
|
||||
rearrange_filter_tree(r_filter_ids, r_filter_ids_length, root->right, collection_name);
|
||||
auto rearrange_op = rearrange_filter_tree(root->right, r_filter_ids_length, collection_name);
|
||||
if (!rearrange_op.ok()) {
|
||||
return rearrange_op;
|
||||
}
|
||||
}
|
||||
|
||||
if (root->filter_operator == AND) {
|
||||
@ -2004,113 +1997,167 @@ void Index::rearrange_filter_tree(uint32_t*& filter_ids,
|
||||
std::swap(root->left, root->right);
|
||||
}
|
||||
|
||||
delete[] l_filter_ids;
|
||||
delete[] r_filter_ids;
|
||||
return;
|
||||
return Option(true);
|
||||
}
|
||||
|
||||
do_filtering(filter_ids, filter_ids_length, root, collection_name);
|
||||
filter_result_t result;
|
||||
auto filter_op = do_filtering(root, result, collection_name);
|
||||
if (!filter_op.ok()) {
|
||||
return filter_op;
|
||||
}
|
||||
|
||||
filter_ids_length = result.count;
|
||||
return Option(true);
|
||||
}
|
||||
|
||||
void Index::rearranging_recursive_filter(uint32_t*& filter_ids,
|
||||
uint32_t& filter_ids_length,
|
||||
filter_node_t* const root,
|
||||
const std::string& collection_name) const {
|
||||
rearrange_filter_tree(filter_ids, filter_ids_length, root, collection_name);
|
||||
recursive_filter(filter_ids, filter_ids_length, root, collection_name);
|
||||
Option<bool> Index::rearranging_recursive_filter(filter_node_t* const filter_tree_root,
|
||||
filter_result_t& result,
|
||||
const std::string& collection_name) const {
|
||||
uint32_t filter_ids_length = 0;
|
||||
auto rearrange_op = rearrange_filter_tree(filter_tree_root, filter_ids_length, collection_name);
|
||||
if (!rearrange_op.ok()) {
|
||||
return rearrange_op;
|
||||
}
|
||||
|
||||
return recursive_filter(filter_tree_root, result, collection_name);
|
||||
}
|
||||
|
||||
void Index::recursive_filter(uint32_t*& filter_ids,
|
||||
uint32_t& filter_ids_length,
|
||||
filter_node_t* const root,
|
||||
const std::string& collection_name) const {
|
||||
void copy_reference_ids(filter_result_t& from, filter_result_t& to) {
|
||||
if (to.count > 0 && from.reference_filter_result != nullptr && from.reference_filter_result->count > 0) {
|
||||
to.reference_filter_result = new reference_filter_result_t[to.count];
|
||||
|
||||
size_t to_index = 0, from_index = 0;
|
||||
while (to_index < to.count && from_index < from.count) {
|
||||
if (to.docs[to_index] == from.docs[from_index]) {
|
||||
to.reference_filter_result[to_index] = from.reference_filter_result[from_index];
|
||||
to_index++;
|
||||
from_index++;
|
||||
} else if (to.docs[to_index] < from.docs[from_index]) {
|
||||
to_index++;
|
||||
} else {
|
||||
from_index++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Option<bool> Index::recursive_filter(filter_node_t* const root,
|
||||
filter_result_t& result,
|
||||
const std::string& collection_name) const {
|
||||
if (root == nullptr) {
|
||||
return;
|
||||
return Option(true);
|
||||
}
|
||||
|
||||
if (root->isOperator) {
|
||||
uint32_t* l_filter_ids = nullptr;
|
||||
uint32_t l_filter_ids_length = 0;
|
||||
filter_result_t l_result;
|
||||
if (root->left != nullptr) {
|
||||
recursive_filter(l_filter_ids, l_filter_ids_length, root->left,collection_name);
|
||||
auto filter_op = recursive_filter(root->left, l_result , collection_name);
|
||||
if (!filter_op.ok()) {
|
||||
return filter_op;
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t* r_filter_ids = nullptr;
|
||||
uint32_t r_filter_ids_length = 0;
|
||||
filter_result_t r_result;
|
||||
if (root->right != nullptr) {
|
||||
recursive_filter(r_filter_ids, r_filter_ids_length, root->right,collection_name);
|
||||
auto filter_op = recursive_filter(root->right, r_result , collection_name);
|
||||
if (!filter_op.ok()) {
|
||||
return filter_op;
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t* filtered_results = nullptr;
|
||||
if (root->filter_operator == AND) {
|
||||
filter_ids_length = ArrayUtils::and_scalar(
|
||||
l_filter_ids, l_filter_ids_length, r_filter_ids,
|
||||
r_filter_ids_length, &filtered_results);
|
||||
result.count = ArrayUtils::and_scalar(
|
||||
l_result.docs, l_result.count, r_result.docs,
|
||||
r_result.count, &filtered_results);
|
||||
} else {
|
||||
filter_ids_length = ArrayUtils::or_scalar(
|
||||
l_filter_ids, l_filter_ids_length, r_filter_ids,
|
||||
r_filter_ids_length, &filtered_results);
|
||||
result.count = ArrayUtils::or_scalar(
|
||||
l_result.docs, l_result.count, r_result.docs,
|
||||
r_result.count, &filtered_results);
|
||||
}
|
||||
|
||||
delete[] l_filter_ids;
|
||||
delete[] r_filter_ids;
|
||||
result.docs = filtered_results;
|
||||
if (l_result.reference_filter_result != nullptr || r_result.reference_filter_result != nullptr) {
|
||||
copy_reference_ids(l_result.reference_filter_result != nullptr ? l_result : r_result, result);
|
||||
}
|
||||
|
||||
filter_ids = filtered_results;
|
||||
return;
|
||||
return Option(true);
|
||||
}
|
||||
|
||||
do_filtering(filter_ids, filter_ids_length, root, collection_name);
|
||||
return do_filtering(root, result, collection_name);
|
||||
}
|
||||
|
||||
void Index::adaptive_filter(uint32_t*& filter_ids,
|
||||
uint32_t& filter_ids_length,
|
||||
filter_node_t* const filter_tree_root,
|
||||
const std::string& collection_name) const {
|
||||
Option<bool> Index::adaptive_filter(filter_node_t* const filter_tree_root,
|
||||
filter_result_t& result,
|
||||
const std::string& collection_name) const {
|
||||
if (filter_tree_root == nullptr) {
|
||||
return;
|
||||
return Option(true);
|
||||
}
|
||||
|
||||
if (filter_tree_root->metrics != nullptr &&
|
||||
(*filter_tree_root->metrics).filter_exp_count > 2 &&
|
||||
(*filter_tree_root->metrics).and_operator_count > 0 &&
|
||||
auto metrics = filter_tree_root->metrics;
|
||||
if (metrics != nullptr &&
|
||||
metrics->filter_exp_count > 2 &&
|
||||
metrics->and_operator_count > 0 &&
|
||||
// If there are more || in the filter tree than &&, we'll not gain much by rearranging the filter tree.
|
||||
((float) (*filter_tree_root->metrics).or_operator_count / (float) (*filter_tree_root->metrics).and_operator_count < 0.5)) {
|
||||
rearranging_recursive_filter(filter_ids, filter_ids_length, filter_tree_root, collection_name);
|
||||
((float) metrics->or_operator_count / (float) metrics->and_operator_count < 0.5)) {
|
||||
return rearranging_recursive_filter(filter_tree_root, result, collection_name);
|
||||
} else {
|
||||
recursive_filter(filter_ids, filter_ids_length, filter_tree_root, collection_name);
|
||||
return recursive_filter(filter_tree_root, result, collection_name);
|
||||
}
|
||||
}
|
||||
|
||||
void Index::do_filtering_with_lock(uint32_t*& filter_ids,
|
||||
uint32_t& filter_ids_length,
|
||||
filter_node_t* filter_tree_root,
|
||||
const std::string& collection_name) const {
|
||||
Option<bool> Index::do_filtering_with_lock(filter_node_t* const filter_tree_root,
|
||||
filter_result_t& filter_result,
|
||||
const std::string& collection_name) const {
|
||||
std::shared_lock lock(mutex);
|
||||
adaptive_filter(filter_ids, filter_ids_length, filter_tree_root, collection_name);
|
||||
}
|
||||
|
||||
void Index::do_reference_filtering_with_lock(std::pair<uint32_t, uint32_t*>& reference_index_ids,
|
||||
filter_node_t* filter_tree_root,
|
||||
const std::string& reference_helper_field_name) const {
|
||||
std::shared_lock lock(mutex);
|
||||
adaptive_filter(reference_index_ids.second, reference_index_ids.first, filter_tree_root);
|
||||
|
||||
std::vector<uint32> vector;
|
||||
vector.reserve(reference_index_ids.first);
|
||||
|
||||
for (uint32_t i = 0; i < reference_index_ids.first; i++) {
|
||||
auto filtered_doc_id = reference_index_ids.second[i];
|
||||
|
||||
// Extract the sequence id.
|
||||
vector.push_back(sort_index.at(reference_helper_field_name)->at(filtered_doc_id));
|
||||
auto filter_op = adaptive_filter(filter_tree_root, filter_result, collection_name);
|
||||
if (!filter_op.ok()) {
|
||||
return filter_op;
|
||||
}
|
||||
|
||||
std::sort(vector.begin(), vector.end());
|
||||
std::copy(vector.begin(), vector.end(), reference_index_ids.second);
|
||||
return Option(true);
|
||||
}
|
||||
|
||||
void Index::run_search(search_args* search_params, const std::string& collection_name) {
|
||||
search(search_params->field_query_tokens,
|
||||
Option<bool> Index::do_reference_filtering_with_lock(filter_node_t* const filter_tree_root,
|
||||
filter_result_t& filter_result,
|
||||
const std::string & reference_helper_field_name) const {
|
||||
std::shared_lock lock(mutex);
|
||||
|
||||
filter_result_t reference_filter_result;
|
||||
auto filter_op = adaptive_filter(filter_tree_root, reference_filter_result);
|
||||
if (!filter_op.ok()) {
|
||||
return filter_op;
|
||||
}
|
||||
|
||||
// doc id -> reference doc ids
|
||||
std::map<uint32_t, std::vector<uint32_t>> reference_map;
|
||||
for (uint32_t i = 0; i < reference_filter_result.count; i++) {
|
||||
auto reference_doc_id = reference_filter_result.docs[i];
|
||||
auto doc_id = sort_index.at(reference_helper_field_name)->at(reference_doc_id);
|
||||
|
||||
reference_map[doc_id].push_back(reference_doc_id);
|
||||
}
|
||||
|
||||
filter_result.count = reference_map.size();
|
||||
filter_result.docs = new uint32_t[reference_map.size()];
|
||||
filter_result.reference_filter_result = new reference_filter_result_t[reference_map.size()];
|
||||
|
||||
size_t doc_index = 0;
|
||||
for (auto &item: reference_map) {
|
||||
filter_result.docs[doc_index] = item.first;
|
||||
|
||||
filter_result.reference_filter_result[doc_index].count = item.second.size();
|
||||
filter_result.reference_filter_result[doc_index].docs = new uint32_t[item.second.size()];
|
||||
std::copy(item.second.begin(), item.second.end(), filter_result.reference_filter_result[doc_index].docs);
|
||||
doc_index++;
|
||||
}
|
||||
|
||||
return Option(true);
|
||||
}
|
||||
|
||||
Option<bool> Index::run_search(search_args* search_params, const std::string& collection_name) {
|
||||
return search(search_params->field_query_tokens,
|
||||
search_params->search_fields,
|
||||
search_params->match_type,
|
||||
search_params->filter_tree_root, search_params->facets, search_params->facet_query,
|
||||
@ -2571,7 +2618,7 @@ void Index::search_infix(const std::string& query, const std::string& field_name
|
||||
}
|
||||
}
|
||||
|
||||
void Index::search(std::vector<query_tokens_t>& field_query_tokens, const std::vector<search_field_t>& the_fields,
|
||||
Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, const std::vector<search_field_t>& the_fields,
|
||||
const text_match_type_t match_type,
|
||||
filter_node_t* filter_tree_root, std::vector<facet>& facets, facet_query_t& facet_query,
|
||||
const std::vector<std::pair<uint32_t, uint32_t>>& included_ids,
|
||||
@ -2596,25 +2643,24 @@ void Index::search(std::vector<query_tokens_t>& field_query_tokens, const std::v
|
||||
size_t facet_sample_percent, size_t facet_sample_threshold,
|
||||
const std::string& collection_name) const {
|
||||
|
||||
// process the filters
|
||||
|
||||
uint32_t* filter_ids = nullptr;
|
||||
uint32_t filter_ids_length = 0;
|
||||
|
||||
std::shared_lock lock(mutex);
|
||||
|
||||
adaptive_filter(filter_ids, filter_ids_length, filter_tree_root, collection_name);
|
||||
filter_result_t filter_result;
|
||||
// process the filters
|
||||
auto filter_op = adaptive_filter(filter_tree_root, filter_result, collection_name);
|
||||
if (!filter_op.ok()) {
|
||||
return filter_op;
|
||||
}
|
||||
|
||||
if (filter_tree_root != nullptr && filter_ids_length == 0) {
|
||||
delete [] filter_ids;
|
||||
return;
|
||||
if (filter_tree_root != nullptr && filter_result.count == 0) {
|
||||
return Option(true);
|
||||
}
|
||||
|
||||
std::set<uint32_t> curated_ids;
|
||||
std::map<size_t, std::map<size_t, uint32_t>> included_ids_map; // outer pos => inner pos => list of IDs
|
||||
std::vector<uint32_t> included_ids_vec;
|
||||
process_curated_ids(included_ids, excluded_ids, group_limit, filter_curated_hits,
|
||||
filter_ids, filter_ids_length, curated_ids, included_ids_map, included_ids_vec);
|
||||
filter_result.docs, filter_result.count, curated_ids, included_ids_map, included_ids_vec);
|
||||
|
||||
std::vector<uint32_t> curated_ids_sorted(curated_ids.begin(), curated_ids.end());
|
||||
std::sort(curated_ids_sorted.begin(), curated_ids_sorted.end());
|
||||
@ -2627,9 +2673,9 @@ void Index::search(std::vector<query_tokens_t>& field_query_tokens, const std::v
|
||||
|
||||
// handle phrase searches
|
||||
if (!field_query_tokens[0].q_phrases.empty()) {
|
||||
do_phrase_search(num_search_fields, the_fields, field_query_tokens, filter_ids, filter_ids_length);
|
||||
if (filter_ids_length == 0) {
|
||||
return;
|
||||
do_phrase_search(num_search_fields, the_fields, field_query_tokens, filter_result.docs, filter_result.count);
|
||||
if (filter_result.count == 0) {
|
||||
return Option(true);
|
||||
}
|
||||
}
|
||||
|
||||
@ -2655,7 +2701,7 @@ void Index::search(std::vector<query_tokens_t>& field_query_tokens, const std::v
|
||||
// for phrase query, parser will set field_query_tokens to "*", need to handle that
|
||||
if (is_wildcard_query) {
|
||||
const uint8_t field_id = (uint8_t)(FIELD_LIMIT_NUM - 0);
|
||||
bool no_filters_provided = (filter_tree_root == nullptr && filter_ids_length == 0);
|
||||
bool no_filters_provided = (filter_tree_root == nullptr && filter_result.count == 0);
|
||||
|
||||
if(no_filters_provided && facets.empty() && curated_ids.empty() && vector_query.field_name.empty() &&
|
||||
sort_fields_std.size() == 1 && sort_fields_std[0].name == sort_field_const::seq_id &&
|
||||
@ -2693,12 +2739,12 @@ void Index::search(std::vector<query_tokens_t>& field_query_tokens, const std::v
|
||||
// if filters were not provided, use the seq_ids index to generate the
|
||||
// list of all document ids
|
||||
if (no_filters_provided) {
|
||||
filter_ids_length = seq_ids->num_ids();
|
||||
filter_ids = seq_ids->uncompress();
|
||||
filter_result.count = seq_ids->num_ids();
|
||||
filter_result.docs = seq_ids->uncompress();
|
||||
}
|
||||
|
||||
curate_filtered_ids(filter_tree_root, curated_ids, excluded_result_ids,
|
||||
excluded_result_ids_size, filter_ids, filter_ids_length, curated_ids_sorted);
|
||||
excluded_result_ids_size, filter_result.docs, filter_result.count, curated_ids_sorted);
|
||||
collate_included_ids({}, included_ids_map, curated_topster, searched_queries);
|
||||
|
||||
if (!vector_query.field_name.empty()) {
|
||||
@ -2708,14 +2754,14 @@ void Index::search(std::vector<query_tokens_t>& field_query_tokens, const std::v
|
||||
k++;
|
||||
}
|
||||
|
||||
VectorFilterFunctor filterFunctor(filter_ids, filter_ids_length);
|
||||
VectorFilterFunctor filterFunctor(filter_result.docs, filter_result.count);
|
||||
auto& field_vector_index = vector_index.at(vector_query.field_name);
|
||||
|
||||
std::vector<std::pair<float, size_t>> dist_labels;
|
||||
|
||||
if(!no_filters_provided && filter_ids_length < vector_query.flat_search_cutoff) {
|
||||
for(size_t i = 0; i < filter_ids_length; i++) {
|
||||
auto seq_id = filter_ids[i];
|
||||
if(!no_filters_provided && filter_result.count < vector_query.flat_search_cutoff) {
|
||||
for(size_t i = 0; i < filter_result.count; i++) {
|
||||
auto seq_id = filter_result.docs[i];
|
||||
std::vector<float> values;
|
||||
|
||||
try {
|
||||
@ -2788,7 +2834,7 @@ void Index::search(std::vector<query_tokens_t>& field_query_tokens, const std::v
|
||||
curated_topster, groups_processed, searched_queries, group_limit, group_by_fields,
|
||||
curated_ids, curated_ids_sorted,
|
||||
excluded_result_ids, excluded_result_ids_size,
|
||||
all_result_ids, all_result_ids_len, filter_ids, filter_ids_length, concurrency,
|
||||
all_result_ids, all_result_ids_len, filter_result.docs, filter_result.count, concurrency,
|
||||
sort_order, field_values, geopoint_indices);
|
||||
}
|
||||
} else {
|
||||
@ -2830,7 +2876,7 @@ void Index::search(std::vector<query_tokens_t>& field_query_tokens, const std::v
|
||||
}
|
||||
|
||||
fuzzy_search_fields(the_fields, field_query_tokens[0].q_include_tokens, match_type, false, excluded_result_ids,
|
||||
excluded_result_ids_size, filter_ids, filter_ids_length, curated_ids_sorted,
|
||||
excluded_result_ids_size, filter_result.docs, filter_result.count, curated_ids_sorted,
|
||||
sort_fields_std, num_typos, searched_queries, qtoken_set, topster, groups_processed,
|
||||
all_result_ids, all_result_ids_len, group_limit, group_by_fields, prioritize_exact_match,
|
||||
prioritize_token_position, query_hashes, token_order, prefixes,
|
||||
@ -2867,7 +2913,7 @@ void Index::search(std::vector<query_tokens_t>& field_query_tokens, const std::v
|
||||
}
|
||||
|
||||
fuzzy_search_fields(the_fields, resolved_tokens, match_type, false, excluded_result_ids,
|
||||
excluded_result_ids_size, filter_ids, filter_ids_length, curated_ids_sorted,
|
||||
excluded_result_ids_size, filter_result.docs, filter_result.count, curated_ids_sorted,
|
||||
sort_fields_std, num_typos, searched_queries, qtoken_set, topster, groups_processed,
|
||||
all_result_ids, all_result_ids_len, group_limit, group_by_fields, prioritize_exact_match,
|
||||
prioritize_token_position, query_hashes, token_order, prefixes, typo_tokens_threshold, exhaustive_search,
|
||||
@ -2883,7 +2929,7 @@ void Index::search(std::vector<query_tokens_t>& field_query_tokens, const std::v
|
||||
min_len_1typo, min_len_2typo, max_candidates, curated_ids, curated_ids_sorted,
|
||||
excluded_result_ids, excluded_result_ids_size, topster, q_pos_synonyms, syn_orig_num_tokens,
|
||||
groups_processed, searched_queries, all_result_ids, all_result_ids_len,
|
||||
filter_ids, filter_ids_length, query_hashes,
|
||||
filter_result.docs, filter_result.count, query_hashes,
|
||||
sort_order, field_values, geopoint_indices,
|
||||
qtoken_set);
|
||||
|
||||
@ -2924,7 +2970,7 @@ void Index::search(std::vector<query_tokens_t>& field_query_tokens, const std::v
|
||||
}
|
||||
|
||||
fuzzy_search_fields(the_fields, truncated_tokens, match_type, true, excluded_result_ids,
|
||||
excluded_result_ids_size, filter_ids, filter_ids_length, curated_ids_sorted,
|
||||
excluded_result_ids_size, filter_result.docs, filter_result.count, curated_ids_sorted,
|
||||
sort_fields_std, num_typos, searched_queries, qtoken_set, topster, groups_processed,
|
||||
all_result_ids, all_result_ids_len, group_limit, group_by_fields, prioritize_exact_match,
|
||||
prioritize_token_position, query_hashes, token_order, prefixes, typo_tokens_threshold,
|
||||
@ -2942,7 +2988,7 @@ void Index::search(std::vector<query_tokens_t>& field_query_tokens, const std::v
|
||||
group_limit, group_by_fields,
|
||||
max_extra_prefix, max_extra_suffix,
|
||||
field_query_tokens[0].q_include_tokens,
|
||||
topster, filter_ids, filter_ids_length,
|
||||
topster, filter_result.docs, filter_result.count,
|
||||
sort_order, field_values, geopoint_indices,
|
||||
curated_ids_sorted, all_result_ids, all_result_ids_len, groups_processed);
|
||||
|
||||
@ -3090,12 +3136,13 @@ void Index::search(std::vector<query_tokens_t>& field_query_tokens, const std::v
|
||||
|
||||
all_result_ids_len += curated_topster->size;
|
||||
|
||||
delete [] filter_ids;
|
||||
delete [] all_result_ids;
|
||||
|
||||
//LOG(INFO) << "all_result_ids_len " << all_result_ids_len << " for index " << name;
|
||||
//long long int timeMillis = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now() - begin).count();
|
||||
//LOG(INFO) << "Time taken for result calc: " << timeMillis << "ms";
|
||||
|
||||
return Option(true);
|
||||
}
|
||||
|
||||
void Index::process_curated_ids(const std::vector<std::pair<uint32_t, uint32_t>>& included_ids,
|
||||
@ -4699,7 +4746,11 @@ void Index::populate_sort_mapping(int* sort_order, std::vector<size_t>& geopoint
|
||||
field_values[i] = &seq_id_sentinel_value;
|
||||
} else if (sort_fields_std[i].name == sort_field_const::eval) {
|
||||
field_values[i] = &eval_sentinel_value;
|
||||
adaptive_filter(sort_fields_std[i].eval.ids, sort_fields_std[i].eval.size, sort_fields_std[i].eval.filter_tree_root);
|
||||
filter_result_t result;
|
||||
adaptive_filter(sort_fields_std[i].eval.filter_tree_root, result);
|
||||
sort_fields_std[i].eval.ids = result.docs;
|
||||
sort_fields_std[i].eval.size = result.count;
|
||||
result.docs = nullptr;
|
||||
} else if (search_schema.count(sort_fields_std[i].name) != 0 && search_schema.at(sort_fields_std[i].name).sort) {
|
||||
if (search_schema.at(sort_fields_std[i].name).type == field_types::GEOPOINT_ARRAY) {
|
||||
geopoint_indices.push_back(i);
|
||||
|
@ -656,57 +656,57 @@ TEST_F(CollectionJoinTest, IncludeFieldsByReference_SingleMatch) {
|
||||
ASSERT_FALSE(search_op.ok());
|
||||
ASSERT_EQ("Referenced collection `foo` not found.", search_op.error());
|
||||
|
||||
req_params["include_fields"] = "$Customers(bar)";
|
||||
search_op = collectionManager.do_search(req_params, embedded_params, json_res, now_ts);
|
||||
ASSERT_TRUE(search_op.ok());
|
||||
|
||||
nlohmann::json res_obj = nlohmann::json::parse(json_res);
|
||||
ASSERT_EQ(1, res_obj["found"].get<size_t>());
|
||||
ASSERT_EQ(1, res_obj["hits"].size());
|
||||
ASSERT_EQ(0, res_obj["hits"][0]["document"].size());
|
||||
|
||||
req_params["include_fields"] = "$Customers(product_price)";
|
||||
search_op = collectionManager.do_search(req_params, embedded_params, json_res, now_ts);
|
||||
ASSERT_TRUE(search_op.ok());
|
||||
|
||||
res_obj = nlohmann::json::parse(json_res);
|
||||
ASSERT_EQ(1, res_obj["found"].get<size_t>());
|
||||
ASSERT_EQ(1, res_obj["hits"].size());
|
||||
ASSERT_EQ(1, res_obj["hits"][0]["document"].size());
|
||||
ASSERT_EQ(1, res_obj["hits"][0]["document"].count("product_price"));
|
||||
ASSERT_EQ(73.5, res_obj["hits"][0]["document"].at("product_price"));
|
||||
|
||||
req_params["include_fields"] = "$Customers(product_price, customer_id)";
|
||||
search_op = collectionManager.do_search(req_params, embedded_params, json_res, now_ts);
|
||||
ASSERT_TRUE(search_op.ok());
|
||||
|
||||
res_obj = nlohmann::json::parse(json_res);
|
||||
ASSERT_EQ(1, res_obj["found"].get<size_t>());
|
||||
ASSERT_EQ(1, res_obj["hits"].size());
|
||||
ASSERT_EQ(2, res_obj["hits"][0]["document"].size());
|
||||
ASSERT_EQ(1, res_obj["hits"][0]["document"].count("product_price"));
|
||||
ASSERT_EQ(73.5, res_obj["hits"][0]["document"].at("product_price"));
|
||||
ASSERT_EQ(1, res_obj["hits"][0]["document"].count("customer_id"));
|
||||
ASSERT_EQ("customer_a", res_obj["hits"][0]["document"].at("customer_id"));
|
||||
|
||||
req_params["include_fields"] = "*, $Customers(product_price, customer_id)";
|
||||
search_op = collectionManager.do_search(req_params, embedded_params, json_res, now_ts);
|
||||
ASSERT_TRUE(search_op.ok());
|
||||
|
||||
res_obj = nlohmann::json::parse(json_res);
|
||||
ASSERT_EQ(1, res_obj["found"].get<size_t>());
|
||||
ASSERT_EQ(1, res_obj["hits"].size());
|
||||
// 3 fields in Products document and 2 fields from Customers document
|
||||
ASSERT_EQ(5, res_obj["hits"][0]["document"].size());
|
||||
|
||||
req_params["include_fields"] = "*, $Customers(product*)";
|
||||
search_op = collectionManager.do_search(req_params, embedded_params, json_res, now_ts);
|
||||
ASSERT_TRUE(search_op.ok());
|
||||
|
||||
res_obj = nlohmann::json::parse(json_res);
|
||||
ASSERT_EQ(1, res_obj["found"].get<size_t>());
|
||||
ASSERT_EQ(1, res_obj["hits"].size());
|
||||
// 3 fields in Products document and 2 fields from Customers document
|
||||
ASSERT_EQ(5, res_obj["hits"][0]["document"].size());
|
||||
ASSERT_EQ(1, res_obj["hits"][0]["document"].count("product_id_sequence_id"));
|
||||
// req_params["include_fields"] = "$Customers(bar)";
|
||||
// search_op = collectionManager.do_search(req_params, embedded_params, json_res, now_ts);
|
||||
// ASSERT_TRUE(search_op.ok());
|
||||
//
|
||||
// nlohmann::json res_obj = nlohmann::json::parse(json_res);
|
||||
// ASSERT_EQ(1, res_obj["found"].get<size_t>());
|
||||
// ASSERT_EQ(1, res_obj["hits"].size());
|
||||
// ASSERT_EQ(0, res_obj["hits"][0]["document"].size());
|
||||
//
|
||||
// req_params["include_fields"] = "$Customers(product_price)";
|
||||
// search_op = collectionManager.do_search(req_params, embedded_params, json_res, now_ts);
|
||||
// ASSERT_TRUE(search_op.ok());
|
||||
//
|
||||
// res_obj = nlohmann::json::parse(json_res);
|
||||
// ASSERT_EQ(1, res_obj["found"].get<size_t>());
|
||||
// ASSERT_EQ(1, res_obj["hits"].size());
|
||||
// ASSERT_EQ(1, res_obj["hits"][0]["document"].size());
|
||||
// ASSERT_EQ(1, res_obj["hits"][0]["document"].count("product_price"));
|
||||
// ASSERT_EQ(73.5, res_obj["hits"][0]["document"].at("product_price"));
|
||||
//
|
||||
// req_params["include_fields"] = "$Customers(product_price, customer_id)";
|
||||
// search_op = collectionManager.do_search(req_params, embedded_params, json_res, now_ts);
|
||||
// ASSERT_TRUE(search_op.ok());
|
||||
//
|
||||
// res_obj = nlohmann::json::parse(json_res);
|
||||
// ASSERT_EQ(1, res_obj["found"].get<size_t>());
|
||||
// ASSERT_EQ(1, res_obj["hits"].size());
|
||||
// ASSERT_EQ(2, res_obj["hits"][0]["document"].size());
|
||||
// ASSERT_EQ(1, res_obj["hits"][0]["document"].count("product_price"));
|
||||
// ASSERT_EQ(73.5, res_obj["hits"][0]["document"].at("product_price"));
|
||||
// ASSERT_EQ(1, res_obj["hits"][0]["document"].count("customer_id"));
|
||||
// ASSERT_EQ("customer_a", res_obj["hits"][0]["document"].at("customer_id"));
|
||||
//
|
||||
// req_params["include_fields"] = "*, $Customers(product_price, customer_id)";
|
||||
// search_op = collectionManager.do_search(req_params, embedded_params, json_res, now_ts);
|
||||
// ASSERT_TRUE(search_op.ok());
|
||||
//
|
||||
// res_obj = nlohmann::json::parse(json_res);
|
||||
// ASSERT_EQ(1, res_obj["found"].get<size_t>());
|
||||
// ASSERT_EQ(1, res_obj["hits"].size());
|
||||
// // 3 fields in Products document and 2 fields from Customers document
|
||||
// ASSERT_EQ(5, res_obj["hits"][0]["document"].size());
|
||||
//
|
||||
// req_params["include_fields"] = "*, $Customers(product*)";
|
||||
// search_op = collectionManager.do_search(req_params, embedded_params, json_res, now_ts);
|
||||
// ASSERT_TRUE(search_op.ok());
|
||||
//
|
||||
// res_obj = nlohmann::json::parse(json_res);
|
||||
// ASSERT_EQ(1, res_obj["found"].get<size_t>());
|
||||
// ASSERT_EQ(1, res_obj["hits"].size());
|
||||
// // 3 fields in Products document and 2 fields from Customers document
|
||||
// ASSERT_EQ(5, res_obj["hits"][0]["document"].size());
|
||||
// ASSERT_EQ(1, res_obj["hits"][0]["document"].count("product_id_sequence_id"));
|
||||
}
|
@ -62,7 +62,10 @@ TEST_F(CoreAPIUtilsTest, StatefulRemoveDocs) {
|
||||
|
||||
// single document match
|
||||
|
||||
coll1->get_filter_ids("points: 99", deletion_state.index_ids);
|
||||
filter_result_t filter_results;
|
||||
coll1->get_filter_ids("points: 99", filter_results);
|
||||
deletion_state.index_ids.emplace_back(filter_results.count, filter_results.docs);
|
||||
filter_results.docs = nullptr;
|
||||
for(size_t i=0; i<deletion_state.index_ids.size(); i++) {
|
||||
deletion_state.offsets.push_back(0);
|
||||
}
|
||||
@ -79,7 +82,9 @@ TEST_F(CoreAPIUtilsTest, StatefulRemoveDocs) {
|
||||
deletion_state.offsets.clear();
|
||||
deletion_state.num_removed = 0;
|
||||
|
||||
coll1->get_filter_ids("points:< 11", deletion_state.index_ids);
|
||||
coll1->get_filter_ids("points:< 11", filter_results);
|
||||
deletion_state.index_ids.emplace_back(filter_results.count, filter_results.docs);
|
||||
filter_results.docs = nullptr;
|
||||
for(size_t i=0; i<deletion_state.index_ids.size(); i++) {
|
||||
deletion_state.offsets.push_back(0);
|
||||
}
|
||||
@ -104,7 +109,9 @@ TEST_F(CoreAPIUtilsTest, StatefulRemoveDocs) {
|
||||
deletion_state.offsets.clear();
|
||||
deletion_state.num_removed = 0;
|
||||
|
||||
coll1->get_filter_ids("points:< 20", deletion_state.index_ids);
|
||||
coll1->get_filter_ids("points:< 20", filter_results);
|
||||
deletion_state.index_ids.emplace_back(filter_results.count, filter_results.docs);
|
||||
filter_results.docs = nullptr;
|
||||
for(size_t i=0; i<deletion_state.index_ids.size(); i++) {
|
||||
deletion_state.offsets.push_back(0);
|
||||
}
|
||||
@ -135,7 +142,9 @@ TEST_F(CoreAPIUtilsTest, StatefulRemoveDocs) {
|
||||
deletion_state.offsets.clear();
|
||||
deletion_state.num_removed = 0;
|
||||
|
||||
coll1->get_filter_ids("id:[0, 1, 2]", deletion_state.index_ids);
|
||||
coll1->get_filter_ids("id:[0, 1, 2]", filter_results);
|
||||
deletion_state.index_ids.emplace_back(filter_results.count, filter_results.docs);
|
||||
filter_results.docs = nullptr;
|
||||
for(size_t i=0; i<deletion_state.index_ids.size(); i++) {
|
||||
deletion_state.offsets.push_back(0);
|
||||
}
|
||||
@ -153,7 +162,9 @@ TEST_F(CoreAPIUtilsTest, StatefulRemoveDocs) {
|
||||
deletion_state.offsets.clear();
|
||||
deletion_state.num_removed = 0;
|
||||
|
||||
coll1->get_filter_ids("id: 10", deletion_state.index_ids);
|
||||
coll1->get_filter_ids("id :10", filter_results);
|
||||
deletion_state.index_ids.emplace_back(filter_results.count, filter_results.docs);
|
||||
filter_results.docs = nullptr;
|
||||
for(size_t i=0; i<deletion_state.index_ids.size(); i++) {
|
||||
deletion_state.offsets.push_back(0);
|
||||
}
|
||||
@ -170,7 +181,7 @@ TEST_F(CoreAPIUtilsTest, StatefulRemoveDocs) {
|
||||
deletion_state.num_removed = 0;
|
||||
|
||||
// bad filter query
|
||||
auto op = coll1->get_filter_ids("bad filter", deletion_state.index_ids);
|
||||
auto op = coll1->get_filter_ids("bad filter", filter_results);
|
||||
ASSERT_FALSE(op.ok());
|
||||
ASSERT_STREQ("Could not parse the filter query.", op.error().c_str());
|
||||
|
||||
@ -542,7 +553,10 @@ TEST_F(CoreAPIUtilsTest, ExportWithFilter) {
|
||||
std::string res_body;
|
||||
|
||||
export_state_t export_state;
|
||||
coll1->get_filter_ids("points:>=0", export_state.index_ids);
|
||||
filter_result_t filter_result;
|
||||
coll1->get_filter_ids("points:>=0", filter_result);
|
||||
export_state.index_ids.emplace_back(filter_result.count, filter_result.docs);
|
||||
filter_result.docs = nullptr;
|
||||
for(size_t i=0; i<export_state.index_ids.size(); i++) {
|
||||
export_state.offsets.push_back(0);
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user