mirror of
https://github.com/typesense/typesense.git
synced 2025-05-21 22:33:27 +08:00
Optimize reference filtering.
This commit is contained in:
parent
ebfbf4f48d
commit
6c5662bc95
@ -436,6 +436,10 @@ public:
|
||||
Option<bool> get_filter_ids(const std::string & filter_query,
|
||||
std::vector<std::pair<size_t, uint32_t*>>& index_ids) const;
|
||||
|
||||
Option<bool> get_reference_filter_ids(const std::string & filter_query,
|
||||
const std::string & collection_name,
|
||||
std::pair<uint32_t, uint32_t*>& reference_index_ids) const;
|
||||
|
||||
Option<bool> validate_reference_filter(const std::string& filter_query) const;
|
||||
|
||||
Option<nlohmann::json> get(const std::string & id) const;
|
||||
|
@ -715,6 +715,10 @@ public:
|
||||
uint32_t& filter_ids_length,
|
||||
filter_node_t const* const& filter_tree_root) const;
|
||||
|
||||
void do_reference_filtering_with_lock(std::pair<uint32_t, uint32_t*>& reference_index_ids,
|
||||
filter_node_t const* const& filter_tree_root,
|
||||
const std::string& reference_field_name) const;
|
||||
|
||||
void refresh_schemas(const std::vector<field>& new_fields, const std::vector<field>& del_fields);
|
||||
|
||||
// the following methods are not synchronized because their parent calls are synchronized or they are const/static
|
||||
|
@ -2362,6 +2362,41 @@ Option<bool> Collection::get_filter_ids(const std::string & filter_query,
|
||||
return Option<bool>(true);
|
||||
}
|
||||
|
||||
Option<bool> Collection::get_reference_filter_ids(const std::string & filter_query,
|
||||
const std::string & collection_name,
|
||||
std::pair<uint32_t, uint32_t*>& reference_index_ids) const {
|
||||
std::shared_lock lock(mutex);
|
||||
|
||||
std::string reference_field_name;
|
||||
for (auto const& field: fields) {
|
||||
if (!field.reference.empty() &&
|
||||
field.reference.find(collection_name) == 0 &&
|
||||
field.reference.find('.') == collection_name.size()) {
|
||||
reference_field_name = field.name;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (reference_field_name.empty()) {
|
||||
return Option<bool>(400, "Could not find any field in `" + name + "` referencing the collection `"
|
||||
+ collection_name + "`.");
|
||||
}
|
||||
|
||||
const std::string doc_id_prefix = std::to_string(collection_id) + "_" + DOC_ID_PREFIX + "_";
|
||||
filter_node_t* filter_tree_root = nullptr;
|
||||
Option<bool> filter_op = filter::parse_filter_query(filter_query, search_schema,
|
||||
store, doc_id_prefix, filter_tree_root);
|
||||
if(!filter_op.ok()) {
|
||||
return filter_op;
|
||||
}
|
||||
|
||||
reference_field_name += "_sequence_id";
|
||||
index->do_reference_filtering_with_lock(reference_index_ids, filter_tree_root, reference_field_name);
|
||||
|
||||
delete filter_tree_root;
|
||||
return Option<bool>(true);
|
||||
}
|
||||
|
||||
Option<bool> Collection::validate_reference_filter(const std::string& filter_query) const {
|
||||
std::shared_lock lock(mutex);
|
||||
|
||||
|
@ -723,7 +723,7 @@ Option<bool> field::json_field_to_field(bool enable_nested_fields, nlohmann::jso
|
||||
|
||||
if (!field_json[fields::reference].get<std::string>().empty()) {
|
||||
the_fields.emplace_back(
|
||||
field(field_json[fields::name].get<std::string>() + "_sequence_id", "string", false,
|
||||
field(field_json[fields::name].get<std::string>() + "_sequence_id", "int64", false,
|
||||
field_json[fields::optional], true)
|
||||
);
|
||||
}
|
||||
|
@ -470,7 +470,7 @@ Option<uint32_t> Index::validate_index_in_memory(nlohmann::json& document, uint3
|
||||
"Multiple documents having" + match + "found in the collection `" + tokens[0] + "`.");
|
||||
}
|
||||
|
||||
document[a_field.name + "_sequence_id"] = StringUtils::serialize_uint32_t(*(documents[0].second));
|
||||
document[a_field.name + "_sequence_id"] = *(documents[0].second);
|
||||
|
||||
delete [] documents[0].second;
|
||||
}
|
||||
@ -1665,7 +1665,7 @@ void Index::do_filtering(uint32_t*& filter_ids,
|
||||
uint32_t& filter_ids_length,
|
||||
filter_node_t const* const root) const {
|
||||
// auto begin = std::chrono::high_resolution_clock::now();
|
||||
const filter a_filter = root->filter_exp;
|
||||
/**/ const filter a_filter = root->filter_exp;
|
||||
|
||||
bool is_referenced_filter = !a_filter.referenced_collection_name.empty();
|
||||
if (is_referenced_filter) {
|
||||
@ -1673,48 +1673,16 @@ void Index::do_filtering(uint32_t*& filter_ids,
|
||||
auto& cm = CollectionManager::get_instance();
|
||||
auto collection = cm.get_collection(a_filter.referenced_collection_name);
|
||||
|
||||
std::vector<std::pair<size_t, uint32_t*>> documents;
|
||||
auto op = collection->get_filter_ids(a_filter.field_name, documents);
|
||||
std::pair<uint32_t, uint32_t*> documents;
|
||||
auto op = collection->get_reference_filter_ids(a_filter.field_name,
|
||||
cm.get_collection_with_id(collection_id)->get_name(),
|
||||
documents);
|
||||
if (!op.ok()) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (documents[0].first > 0) {
|
||||
const field* reference_field = nullptr;
|
||||
for (auto const& f: collection->get_fields()) {
|
||||
auto this_collection_name = cm.get_collection_with_id(collection_id)->get_name();
|
||||
if (!f.reference.empty() &&
|
||||
f.reference.find(this_collection_name) == 0 &&
|
||||
f.reference.find('.') == this_collection_name.size()) {
|
||||
reference_field = &f;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (reference_field == nullptr) {
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<uint32> result_ids;
|
||||
for (size_t i = 0; i < documents[0].first; i++) {
|
||||
uint32_t seq_id = *(documents[0].second + i);
|
||||
|
||||
nlohmann::json document;
|
||||
auto op = collection->get_document_from_store(seq_id, document);
|
||||
if (!op.ok()) {
|
||||
return;
|
||||
}
|
||||
|
||||
result_ids.push_back(StringUtils::deserialize_uint32_t(document[reference_field->name + "_sequence_id"].get<std::string>()));
|
||||
}
|
||||
|
||||
filter_ids = new uint32[result_ids.size()];
|
||||
std::sort(result_ids.begin(), result_ids.end());
|
||||
std::copy(result_ids.begin(), result_ids.end(), filter_ids);
|
||||
filter_ids_length = result_ids.size();
|
||||
}
|
||||
|
||||
delete [] documents[0].second;
|
||||
filter_ids_length = documents.first;
|
||||
filter_ids = documents.second;
|
||||
return;
|
||||
}
|
||||
|
||||
@ -2099,6 +2067,26 @@ void Index::do_filtering_with_lock(uint32_t*& filter_ids,
|
||||
recursive_filter(filter_ids, filter_ids_length, filter_tree_root, false);
|
||||
}
|
||||
|
||||
void Index::do_reference_filtering_with_lock(std::pair<uint32_t, uint32_t*>& reference_index_ids,
|
||||
filter_node_t const* const& filter_tree_root,
|
||||
const std::string& reference_field_name) const {
|
||||
std::shared_lock lock(mutex);
|
||||
recursive_filter(reference_index_ids.second, reference_index_ids.first, filter_tree_root, false);
|
||||
|
||||
std::vector<uint32> vector;
|
||||
vector.reserve(reference_index_ids.first);
|
||||
|
||||
for (uint32_t i = 0; i < reference_index_ids.first; i++) {
|
||||
auto filtered_doc_id = *(reference_index_ids.second + i);
|
||||
|
||||
// Extract the sequence_id from the reference field.
|
||||
vector.push_back(sort_index.at(reference_field_name)->at(filtered_doc_id));
|
||||
}
|
||||
|
||||
std::sort(vector.begin(), vector.end());
|
||||
std::copy(vector.begin(), vector.end(), reference_index_ids.second);
|
||||
}
|
||||
|
||||
void Index::run_search(search_args* search_params) {
|
||||
search(search_params->field_query_tokens,
|
||||
search_params->search_fields,
|
||||
|
@ -101,18 +101,6 @@ TEST_F(CollectionJoinTest, SchemaReferenceField) {
|
||||
}
|
||||
|
||||
TEST_F(CollectionJoinTest, IndexDocumentHavingReferenceField) {
|
||||
auto products_schema_json =
|
||||
R"({
|
||||
"name": "Products",
|
||||
"fields": [
|
||||
{"name": "product_id", "type": "string", "index": false, "optional": true},
|
||||
{"name": "product_name", "type": "string"},
|
||||
{"name": "product_description", "type": "string"}
|
||||
]
|
||||
})"_json;
|
||||
auto collection_create_op = collectionManager.create_collection(products_schema_json);
|
||||
ASSERT_TRUE(collection_create_op.ok());
|
||||
|
||||
auto customers_schema_json =
|
||||
R"({
|
||||
"name": "Customers",
|
||||
@ -123,7 +111,7 @@ TEST_F(CollectionJoinTest, IndexDocumentHavingReferenceField) {
|
||||
{"name": "product_id", "type": "string", "reference": "foo"}
|
||||
]
|
||||
})"_json;
|
||||
collection_create_op = collectionManager.create_collection(customers_schema_json);
|
||||
auto collection_create_op = collectionManager.create_collection(customers_schema_json);
|
||||
ASSERT_TRUE(collection_create_op.ok());
|
||||
|
||||
nlohmann::json customer_json = R"({
|
||||
@ -134,9 +122,9 @@ TEST_F(CollectionJoinTest, IndexDocumentHavingReferenceField) {
|
||||
})"_json;
|
||||
|
||||
auto customer_collection = collection_create_op.get();
|
||||
auto add_op = customer_collection->add(customer_json.dump());
|
||||
ASSERT_FALSE(add_op.ok());
|
||||
ASSERT_EQ("Invalid reference `foo`.", add_op.error());
|
||||
auto add_doc_op = customer_collection->add(customer_json.dump());
|
||||
ASSERT_FALSE(add_doc_op.ok());
|
||||
ASSERT_EQ("Invalid reference `foo`.", add_doc_op.error());
|
||||
collectionManager.drop_collection("Customers");
|
||||
|
||||
customers_schema_json =
|
||||
@ -153,9 +141,9 @@ TEST_F(CollectionJoinTest, IndexDocumentHavingReferenceField) {
|
||||
ASSERT_TRUE(collection_create_op.ok());
|
||||
|
||||
customer_collection = collection_create_op.get();
|
||||
add_op = customer_collection->add(customer_json.dump());
|
||||
ASSERT_FALSE(add_op.ok());
|
||||
ASSERT_EQ("Referenced collection `products` not found.", add_op.error());
|
||||
add_doc_op = customer_collection->add(customer_json.dump());
|
||||
ASSERT_FALSE(add_doc_op.ok());
|
||||
ASSERT_EQ("Referenced collection `products` not found.", add_doc_op.error());
|
||||
collectionManager.drop_collection("Customers");
|
||||
|
||||
customers_schema_json =
|
||||
@ -170,11 +158,23 @@ TEST_F(CollectionJoinTest, IndexDocumentHavingReferenceField) {
|
||||
})"_json;
|
||||
collection_create_op = collectionManager.create_collection(customers_schema_json);
|
||||
ASSERT_TRUE(collection_create_op.ok());
|
||||
|
||||
customer_collection = collection_create_op.get();
|
||||
add_op = customer_collection->add(customer_json.dump());
|
||||
ASSERT_FALSE(add_op.ok());
|
||||
ASSERT_EQ("Referenced field `id` not found in the collection `Products`.", add_op.error());
|
||||
|
||||
auto products_schema_json =
|
||||
R"({
|
||||
"name": "Products",
|
||||
"fields": [
|
||||
{"name": "product_id", "type": "string", "index": false, "optional": true},
|
||||
{"name": "product_name", "type": "string"},
|
||||
{"name": "product_description", "type": "string"}
|
||||
]
|
||||
})"_json;
|
||||
collection_create_op = collectionManager.create_collection(products_schema_json);
|
||||
ASSERT_TRUE(collection_create_op.ok());
|
||||
|
||||
add_doc_op = customer_collection->add(customer_json.dump());
|
||||
ASSERT_FALSE(add_doc_op.ok());
|
||||
ASSERT_EQ("Referenced field `id` not found in the collection `Products`.", add_doc_op.error());
|
||||
collectionManager.drop_collection("Customers");
|
||||
|
||||
customers_schema_json =
|
||||
@ -191,9 +191,9 @@ TEST_F(CollectionJoinTest, IndexDocumentHavingReferenceField) {
|
||||
ASSERT_TRUE(collection_create_op.ok());
|
||||
|
||||
customer_collection = collection_create_op.get();
|
||||
add_op = customer_collection->add(customer_json.dump());
|
||||
ASSERT_FALSE(add_op.ok());
|
||||
ASSERT_EQ("Referenced field `product_id` in the collection `Products` must be indexed.", add_op.error());
|
||||
add_doc_op = customer_collection->add(customer_json.dump());
|
||||
ASSERT_FALSE(add_doc_op.ok());
|
||||
ASSERT_EQ("Referenced field `product_id` in the collection `Products` must be indexed.", add_doc_op.error());
|
||||
|
||||
collectionManager.drop_collection("Products");
|
||||
products_schema_json =
|
||||
@ -208,8 +208,8 @@ TEST_F(CollectionJoinTest, IndexDocumentHavingReferenceField) {
|
||||
collection_create_op = collectionManager.create_collection(products_schema_json);
|
||||
ASSERT_TRUE(collection_create_op.ok());
|
||||
|
||||
add_op = customer_collection->add(customer_json.dump());
|
||||
ASSERT_EQ("Referenced document having `product_id` = `a` not found in the collection `Products`.", add_op.error());
|
||||
add_doc_op = customer_collection->add(customer_json.dump());
|
||||
ASSERT_EQ("Referenced document having `product_id` = `a` not found in the collection `Products`.", add_doc_op.error());
|
||||
|
||||
std::vector<nlohmann::json> products = {
|
||||
R"({
|
||||
@ -232,8 +232,8 @@ TEST_F(CollectionJoinTest, IndexDocumentHavingReferenceField) {
|
||||
}
|
||||
|
||||
customer_json["product_id"] = "product_a";
|
||||
add_op = customer_collection->add(customer_json.dump());
|
||||
ASSERT_EQ("Multiple documents having `product_id` = `product_a` found in the collection `Products`.", add_op.error());
|
||||
add_doc_op = customer_collection->add(customer_json.dump());
|
||||
ASSERT_EQ("Multiple documents having `product_id` = `product_a` found in the collection `Products`.", add_doc_op.error());
|
||||
|
||||
collectionManager.drop_collection("Products");
|
||||
products[1]["product_id"] = "product_b";
|
||||
@ -268,15 +268,17 @@ TEST_F(CollectionJoinTest, IndexDocumentHavingReferenceField) {
|
||||
})"_json;
|
||||
collection_create_op = collectionManager.create_collection(customers_schema_json);
|
||||
ASSERT_TRUE(collection_create_op.ok());
|
||||
add_op = customer_collection->add(customer_json.dump());
|
||||
ASSERT_TRUE(add_op.ok());
|
||||
|
||||
customer_collection = collection_create_op.get();
|
||||
add_doc_op = customer_collection->add(customer_json.dump());
|
||||
ASSERT_TRUE(add_doc_op.ok());
|
||||
ASSERT_EQ(customer_collection->get("0").get().count("product_id_sequence_id"), 1);
|
||||
|
||||
// Referenced document should be accessible from Customers collection.
|
||||
auto sequence_id = collectionManager.get_collection("Products")->get_seq_id_collection_prefix() + "_" +
|
||||
customer_collection->get("0").get()["product_id_sequence_id"].get<std::string>();
|
||||
nlohmann::json document;
|
||||
auto get_op = customer_collection->get_document_from_store(sequence_id, document);
|
||||
// Referenced document's sequence_id must be valid.
|
||||
auto get_op = collectionManager.get_collection("Products")->get_document_from_store(
|
||||
customer_collection->get("0").get()["product_id_sequence_id"].get<uint32_t>(),
|
||||
document);
|
||||
ASSERT_TRUE(get_op.ok());
|
||||
ASSERT_EQ(document.count("product_id"), 1);
|
||||
ASSERT_EQ(document["product_id"], "product_a");
|
||||
|
@ -74,9 +74,11 @@ TEST_F(CollectionManagerTest, CollectionCreation) {
|
||||
ASSERT_EQ(0, collection1->get_collection_id());
|
||||
ASSERT_EQ(0, collection1->get_next_seq_id());
|
||||
ASSERT_EQ(facet_fields_expected, collection1->get_facet_fields());
|
||||
ASSERT_EQ(2, collection1->get_sort_fields().size());
|
||||
// product_id_sequence_id is also included
|
||||
ASSERT_EQ(3, collection1->get_sort_fields().size());
|
||||
ASSERT_EQ("location", collection1->get_sort_fields()[0].name);
|
||||
ASSERT_EQ("points", collection1->get_sort_fields()[1].name);
|
||||
ASSERT_EQ("product_id_sequence_id", collection1->get_sort_fields()[1].name);
|
||||
ASSERT_EQ("points", collection1->get_sort_fields()[2].name);
|
||||
ASSERT_EQ(schema.size(), collection1->get_schema().size());
|
||||
ASSERT_EQ("points", collection1->get_default_sorting_field());
|
||||
ASSERT_EQ(false, schema.at("not_stored").index);
|
||||
@ -234,8 +236,8 @@ TEST_F(CollectionManagerTest, CollectionCreation) {
|
||||
"name":"product_id_sequence_id",
|
||||
"nested":false,
|
||||
"optional":true,
|
||||
"sort":false,
|
||||
"type":"string"
|
||||
"sort":true,
|
||||
"type":"int64"
|
||||
}
|
||||
],
|
||||
"id":0,
|
||||
@ -473,9 +475,11 @@ TEST_F(CollectionManagerTest, RestoreRecordsOnRestart) {
|
||||
ASSERT_EQ(0, collection1->get_collection_id());
|
||||
ASSERT_EQ(18, collection1->get_next_seq_id());
|
||||
ASSERT_EQ(facet_fields_expected, collection1->get_facet_fields());
|
||||
ASSERT_EQ(2, collection1->get_sort_fields().size());
|
||||
// product_id_sequence_id is also included
|
||||
ASSERT_EQ(3, collection1->get_sort_fields().size());
|
||||
ASSERT_EQ("location", collection1->get_sort_fields()[0].name);
|
||||
ASSERT_EQ("points", collection1->get_sort_fields()[1].name);
|
||||
ASSERT_EQ("product_id_sequence_id", collection1->get_sort_fields()[1].name);
|
||||
ASSERT_EQ("points", collection1->get_sort_fields()[2].name);
|
||||
ASSERT_EQ(schema.size(), collection1->get_schema().size());
|
||||
ASSERT_EQ("points", collection1->get_default_sorting_field());
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user