diff --git a/include/collection.h b/include/collection.h index 3ea53fdc..94e06f3c 100644 --- a/include/collection.h +++ b/include/collection.h @@ -133,6 +133,11 @@ private: /// "field name" -> reference_pair(referenced_collection_name, referenced_field_name) spp::sparse_hash_map reference_fields; + /// Contains the info where the current collection is referenced. + /// Useful to perform operations such as cascading delete. + /// collection_name -> field_name + spp::sparse_hash_map referenced_in; + // Keep index as the last field since it is initialized in the constructor via init_index(). Add a new field before it. Index* index; diff --git a/src/collection.cpp b/src/collection.cpp index 193fa41c..1d9a8a50 100644 --- a/src/collection.cpp +++ b/src/collection.cpp @@ -3430,6 +3430,23 @@ void Collection::remove_document(const nlohmann::json & document, const uint32_t store->remove(get_doc_id_key(id)); store->remove(get_seq_id_key(seq_id)); } + + if (referenced_in.empty()) { + return; + } + + CollectionManager& collectionManager = CollectionManager::get_instance(); + // Cascade delete all the references. + for (const auto &item: referenced_in) { + auto ref_coll = collectionManager.get_collection(item.first); + if (ref_coll != nullptr) { + filter_result_t filter_result; + ref_coll->get_filter_ids(item.second + ":=" + id, filter_result); + for (uint32_t i = 0; i < filter_result.count; i++) { + ref_coll->remove(std::to_string(filter_result.docs[i])); + } + } + } } Option Collection::remove(const std::string & id, const bool remove_from_store) { @@ -4691,10 +4708,17 @@ Index* Collection::init_index() { if(!field.reference.empty()) { auto dot_index = field.reference.find('.'); - auto collection_name = field.reference.substr(0, dot_index); - auto field_name = field.reference.substr(dot_index + 1); + auto ref_coll_name = field.reference.substr(0, dot_index); + auto ref_field_name = field.reference.substr(dot_index + 1); - reference_fields.emplace(field.name, reference_pair(collection_name, field_name)); + reference_fields.emplace(field.name, reference_pair(ref_coll_name, ref_field_name)); + + auto& collectionManager = CollectionManager::get_instance(); + auto ref_coll = collectionManager.get_collection(ref_coll_name); + if (ref_coll != nullptr) { + // Passing reference helper field helps perform operation on doc_id instead of field value. + ref_coll->referenced_in.emplace(name, field.name + REFERENCE_HELPER_FIELD_SUFFIX); + } } } diff --git a/src/field.cpp b/src/field.cpp index d7263659..eb951070 100644 --- a/src/field.cpp +++ b/src/field.cpp @@ -642,167 +642,3 @@ Option field::validate_and_init_embed_fields(const std::vector(true); } - -void filter_result_t::and_filter_results(const filter_result_t& a, const filter_result_t& b, filter_result_t& result) { - auto lenA = a.count, lenB = b.count; - if (lenA == 0 || lenB == 0) { - return; - } - - result.docs = new uint32_t[std::min(lenA, lenB)]; - - auto A = a.docs, B = b.docs, out = result.docs; - const uint32_t *endA = A + lenA; - const uint32_t *endB = B + lenB; - - // Add an entry of references in the result for each unique collection in a and b. - for (auto const& item: a.reference_filter_results) { - if (result.reference_filter_results.count(item.first) == 0) { - result.reference_filter_results[item.first] = new reference_filter_result_t[std::min(lenA, lenB)]; - } - } - for (auto const& item: b.reference_filter_results) { - if (result.reference_filter_results.count(item.first) == 0) { - result.reference_filter_results[item.first] = new reference_filter_result_t[std::min(lenA, lenB)]; - } - } - - while (true) { - while (*A < *B) { - SKIP_FIRST_COMPARE: - if (++A == endA) { - result.count = out - result.docs; - return; - } - } - while (*A > *B) { - if (++B == endB) { - result.count = out - result.docs; - return; - } - } - if (*A == *B) { - *out = *A; - - // Copy the references of the document from every collection into result. - for (auto const& item: a.reference_filter_results) { - result.reference_filter_results[item.first][out - result.docs] = item.second[A - a.docs]; - } - for (auto const& item: b.reference_filter_results) { - result.reference_filter_results[item.first][out - result.docs] = item.second[B - b.docs]; - } - - out++; - - if (++A == endA || ++B == endB) { - result.count = out - result.docs; - return; - } - } else { - goto SKIP_FIRST_COMPARE; - } - } -} - -void filter_result_t::or_filter_results(const filter_result_t& a, const filter_result_t& b, filter_result_t& result) { - if (a.count == 0 && b.count == 0) { - return; - } - - // If either one of a or b does not have any matches, copy other into result. - if (a.count == 0) { - result = b; - return; - } - if (b.count == 0) { - result = a; - return; - } - - size_t indexA = 0, indexB = 0, res_index = 0, lenA = a.count, lenB = b.count; - result.docs = new uint32_t[lenA + lenB]; - - // Add an entry of references in the result for each unique collection in a and b. - for (auto const& item: a.reference_filter_results) { - if (result.reference_filter_results.count(item.first) == 0) { - result.reference_filter_results[item.first] = new reference_filter_result_t[lenA + lenB]; - } - } - for (auto const& item: b.reference_filter_results) { - if (result.reference_filter_results.count(item.first) == 0) { - result.reference_filter_results[item.first] = new reference_filter_result_t[lenA + lenB]; - } - } - - while (indexA < lenA && indexB < lenB) { - if (a.docs[indexA] < b.docs[indexB]) { - // check for duplicate - if (res_index == 0 || result.docs[res_index - 1] != a.docs[indexA]) { - result.docs[res_index] = a.docs[indexA]; - res_index++; - } - - // Copy references of the last result document from every collection in a. - for (auto const& item: a.reference_filter_results) { - result.reference_filter_results[item.first][res_index - 1] = item.second[indexA]; - } - - indexA++; - } else { - if (res_index == 0 || result.docs[res_index - 1] != b.docs[indexB]) { - result.docs[res_index] = b.docs[indexB]; - res_index++; - } - - for (auto const& item: b.reference_filter_results) { - result.reference_filter_results[item.first][res_index - 1] = item.second[indexB]; - } - - indexB++; - } - } - - while (indexA < lenA) { - if (res_index == 0 || result.docs[res_index - 1] != a.docs[indexA]) { - result.docs[res_index] = a.docs[indexA]; - res_index++; - } - - for (auto const& item: a.reference_filter_results) { - result.reference_filter_results[item.first][res_index - 1] = item.second[indexA]; - } - - indexA++; - } - - while (indexB < lenB) { - if(res_index == 0 || result.docs[res_index - 1] != b.docs[indexB]) { - result.docs[res_index] = b.docs[indexB]; - res_index++; - } - - for (auto const& item: b.reference_filter_results) { - result.reference_filter_results[item.first][res_index - 1] = item.second[indexB]; - } - - indexB++; - } - - result.count = res_index; - - // shrink fit - auto out = new uint32_t[res_index]; - memcpy(out, result.docs, res_index * sizeof(uint32_t)); - delete[] result.docs; - result.docs = out; - - for (auto &item: result.reference_filter_results) { - auto out_references = new reference_filter_result_t[res_index]; - - for (uint32_t i = 0; i < result.count; i++) { - out_references[i] = item.second[i]; - } - delete[] item.second; - item.second = out_references; - } -} diff --git a/src/index.cpp b/src/index.cpp index a5a1c899..add2c363 100644 --- a/src/index.cpp +++ b/src/index.cpp @@ -21,7 +21,6 @@ #include #include #include "logger.h" -#include #include "validator.h" #define RETURN_CIRCUIT_BREAKER if((std::chrono::duration_cast( \ diff --git a/test/collection_join_test.cpp b/test/collection_join_test.cpp index 1d4302ea..0d1cd5b5 100644 --- a/test/collection_join_test.cpp +++ b/test/collection_join_test.cpp @@ -1430,3 +1430,128 @@ TEST_F(CollectionJoinTest, IncludeExcludeFieldsByReference_SingleMatch) { ASSERT_EQ(1, res_obj["hits"][0]["document"].count("product_price")); ASSERT_EQ(73.5, res_obj["hits"][0]["document"].at("product_price")); } + +TEST_F(CollectionJoinTest, CascadeDeletion) { + auto schema_json = + R"({ + "name": "Products", + "fields": [ + {"name": "product_id", "type": "string"}, + {"name": "product_name", "type": "string", "infix": true}, + {"name": "product_description", "type": "string"} + ] + })"_json; + std::vector documents = { + R"({ + "product_id": "product_a", + "product_name": "shampoo", + "product_description": "Our new moisturizing shampoo is perfect for those with dry or damaged hair." + })"_json, + R"({ + "product_id": "product_b", + "product_name": "soap", + "product_description": "Introducing our all-natural, organic soap bar made with essential oils and botanical ingredients." + })"_json + }; + + auto collection_create_op = collectionManager.create_collection(schema_json); + ASSERT_TRUE(collection_create_op.ok()); + for (auto const &json: documents) { + auto add_op = collection_create_op.get()->add(json.dump()); + ASSERT_TRUE(add_op.ok()); + } + + schema_json = + R"({ + "name": "Customers", + "fields": [ + {"name": "customer_id", "type": "string"}, + {"name": "customer_name", "type": "string"}, + {"name": "product_price", "type": "float"}, + {"name": "product_id", "type": "string", "reference": "Products.product_id"} + ] + })"_json; + documents = { + R"({ + "customer_id": "customer_a", + "customer_name": "Joe", + "product_price": 143, + "product_id": "product_a" + })"_json, + R"({ + "customer_id": "customer_a", + "customer_name": "Joe", + "product_price": 73.5, + "product_id": "product_b" + })"_json, + R"({ + "customer_id": "customer_b", + "customer_name": "Dan", + "product_price": 75, + "product_id": "product_a" + })"_json, + R"({ + "customer_id": "customer_b", + "customer_name": "Dan", + "product_price": 140, + "product_id": "product_b" + })"_json + }; + collection_create_op = collectionManager.create_collection(schema_json); + ASSERT_TRUE(collection_create_op.ok()); + for (auto const &json: documents) { + auto add_op = collection_create_op.get()->add(json.dump()); + ASSERT_TRUE(add_op.ok()); + } + std::map req_params = { + {"collection", "Customers"}, + {"q", "*"}, + }; + nlohmann::json embedded_params; + std::string json_res; + auto now_ts = std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()).count(); + + auto search_op = collectionManager.do_search(req_params, embedded_params, json_res, now_ts); + + nlohmann::json res_obj = nlohmann::json::parse(json_res); + ASSERT_EQ(4, res_obj["found"].get()); + ASSERT_EQ("product_b", res_obj["hits"][0]["document"].at("product_id")); + ASSERT_EQ("product_a", res_obj["hits"][1]["document"].at("product_id")); + ASSERT_EQ("product_b", res_obj["hits"][0]["document"].at("product_id")); + ASSERT_EQ("product_a", res_obj["hits"][1]["document"].at("product_id")); + + req_params = { + {"collection", "Products"}, + {"q", "*"}, + }; + search_op = collectionManager.do_search(req_params, embedded_params, json_res, now_ts); + + res_obj = nlohmann::json::parse(json_res); + ASSERT_EQ(2, res_obj["found"].get()); + ASSERT_EQ("product_b", res_obj["hits"][0]["document"].at("product_id")); + ASSERT_EQ("product_a", res_obj["hits"][1]["document"].at("product_id")); + + collectionManager.get_collection_unsafe("Products")->remove("0"); + + req_params = { + {"collection", "Products"}, + {"q", "*"}, + }; + search_op = collectionManager.do_search(req_params, embedded_params, json_res, now_ts); + + res_obj = nlohmann::json::parse(json_res); + ASSERT_EQ(1, res_obj["found"].get()); + ASSERT_EQ("product_b", res_obj["hits"][0]["document"].at("product_id")); + + req_params = { + {"collection", "Customers"}, + {"q", "*"}, + }; + search_op = collectionManager.do_search(req_params, embedded_params, json_res, now_ts); + + res_obj = nlohmann::json::parse(json_res); + ASSERT_EQ(2, res_obj["found"].get()); + ASSERT_EQ("product_b", res_obj["hits"][0]["document"].at("product_id")); + ASSERT_EQ("product_b", res_obj["hits"][1]["document"].at("product_id")); +}