Merge branch 'v0.26-facets' into nested_facet_parent

2025-05-21 14:12:27 +08:00 · 2023-07-26 14:01:27 +05:30 · 2023-07-26 14:01:27 +05:30 · 67fe400065
commit 67fe400065
parent 6fa848c6a2 161cf1edec
5 changed files with 157 additions and 168 deletions
--- a/include/collection.h
+++ b/include/collection.h
@ -133,6 +133,11 @@ private:
    /// "field name" -> reference_pair(referenced_collection_name, referenced_field_name)
    spp::sparse_hash_map<std::string, reference_pair> reference_fields;

+    /// Contains the info where the current collection is referenced.
+    /// Useful to perform operations such as cascading delete.
+    /// collection_name -> field_name
+    spp::sparse_hash_map<std::string, std::string> referenced_in;
+
    // Keep index as the last field since it is initialized in the constructor via init_index(). Add a new field before it.
    Index* index;

--- a/src/collection.cpp
+++ b/src/collection.cpp
@ -3430,6 +3430,23 @@ void Collection::remove_document(const nlohmann::json & document, const uint32_t
        store->remove(get_doc_id_key(id));
        store->remove(get_seq_id_key(seq_id));
    }
+
+    if (referenced_in.empty()) {
+        return;
+    }
+
+    CollectionManager& collectionManager = CollectionManager::get_instance();
+    // Cascade delete all the references.
+    for (const auto &item: referenced_in) {
+        auto ref_coll = collectionManager.get_collection(item.first);
+        if (ref_coll != nullptr) {
+            filter_result_t filter_result;
+            ref_coll->get_filter_ids(item.second + ":=" + id, filter_result);
+            for (uint32_t i = 0; i < filter_result.count; i++) {
+                ref_coll->remove(std::to_string(filter_result.docs[i]));
+            }
+        }
+    }
 }

 Option<std::string> Collection::remove(const std::string & id, const bool remove_from_store) {
@ -4691,10 +4708,17 @@ Index* Collection::init_index() {

        if(!field.reference.empty()) {
            auto dot_index = field.reference.find('.');
-            auto collection_name = field.reference.substr(0, dot_index);
-            auto field_name = field.reference.substr(dot_index + 1);
+            auto ref_coll_name = field.reference.substr(0, dot_index);
+            auto ref_field_name = field.reference.substr(dot_index + 1);

-            reference_fields.emplace(field.name, reference_pair(collection_name, field_name));
+            reference_fields.emplace(field.name, reference_pair(ref_coll_name, ref_field_name));
+
+            auto& collectionManager = CollectionManager::get_instance();
+            auto ref_coll = collectionManager.get_collection(ref_coll_name);
+            if (ref_coll != nullptr) {
+                // Passing reference helper field helps perform operation on doc_id instead of field value.
+                ref_coll->referenced_in.emplace(name, field.name + REFERENCE_HELPER_FIELD_SUFFIX);
+            }
        }
    }

--- a/src/field.cpp
+++ b/src/field.cpp
@ -642,167 +642,3 @@ Option<bool> field::validate_and_init_embed_fields(const std::vector<std::pair<s

    return Option<bool>(true);
 }
-
-void filter_result_t::and_filter_results(const filter_result_t& a, const filter_result_t& b, filter_result_t& result) {
-    auto lenA = a.count, lenB = b.count;
-    if (lenA == 0 || lenB == 0) {
-        return;
-    }
-
-    result.docs = new uint32_t[std::min(lenA, lenB)];
-
-    auto A = a.docs, B = b.docs, out = result.docs;
-    const uint32_t *endA = A + lenA;
-    const uint32_t *endB = B + lenB;
-
-    // Add an entry of references in the result for each unique collection in a and b.
-    for (auto const& item: a.reference_filter_results) {
-        if (result.reference_filter_results.count(item.first) == 0) {
-            result.reference_filter_results[item.first] = new reference_filter_result_t[std::min(lenA, lenB)];
-        }
-    }
-    for (auto const& item: b.reference_filter_results) {
-        if (result.reference_filter_results.count(item.first) == 0) {
-            result.reference_filter_results[item.first] = new reference_filter_result_t[std::min(lenA, lenB)];
-        }
-    }
-
-    while (true) {
-        while (*A < *B) {
-            SKIP_FIRST_COMPARE:
-            if (++A == endA) {
-                result.count = out - result.docs;
-                return;
-            }
-        }
-        while (*A > *B) {
-            if (++B == endB) {
-                result.count = out - result.docs;
-                return;
-            }
-        }
-        if (*A == *B) {
-            *out = *A;
-
-            // Copy the references of the document from every collection into result.
-            for (auto const& item: a.reference_filter_results) {
-                result.reference_filter_results[item.first][out - result.docs] = item.second[A - a.docs];
-            }
-            for (auto const& item: b.reference_filter_results) {
-                result.reference_filter_results[item.first][out - result.docs] = item.second[B - b.docs];
-            }
-
-            out++;
-
-            if (++A == endA || ++B == endB) {
-                result.count = out - result.docs;
-                return;
-            }
-        } else {
-            goto SKIP_FIRST_COMPARE;
-        }
-    }
-}
-
-void filter_result_t::or_filter_results(const filter_result_t& a, const filter_result_t& b, filter_result_t& result) {
-    if (a.count == 0 && b.count == 0) {
-        return;
-    }
-
-    // If either one of a or b does not have any matches, copy other into result.
-    if (a.count == 0) {
-        result = b;
-        return;
-    }
-    if (b.count == 0) {
-        result = a;
-        return;
-    }
-
-    size_t indexA = 0, indexB = 0, res_index = 0, lenA = a.count, lenB = b.count;
-    result.docs = new uint32_t[lenA + lenB];
-
-    // Add an entry of references in the result for each unique collection in a and b.
-    for (auto const& item: a.reference_filter_results) {
-        if (result.reference_filter_results.count(item.first) == 0) {
-            result.reference_filter_results[item.first] = new reference_filter_result_t[lenA + lenB];
-        }
-    }
-    for (auto const& item: b.reference_filter_results) {
-        if (result.reference_filter_results.count(item.first) == 0) {
-            result.reference_filter_results[item.first] = new reference_filter_result_t[lenA + lenB];
-        }
-    }
-
-    while (indexA < lenA && indexB < lenB) {
-        if (a.docs[indexA] < b.docs[indexB]) {
-            // check for duplicate
-            if (res_index == 0 || result.docs[res_index - 1] != a.docs[indexA]) {
-                result.docs[res_index] = a.docs[indexA];
-                res_index++;
-            }
-
-            // Copy references of the last result document from every collection in a.
-            for (auto const& item: a.reference_filter_results) {
-                result.reference_filter_results[item.first][res_index - 1] = item.second[indexA];
-            }
-
-            indexA++;
-        } else {
-            if (res_index == 0 || result.docs[res_index - 1] != b.docs[indexB]) {
-                result.docs[res_index] = b.docs[indexB];
-                res_index++;
-            }
-
-            for (auto const& item: b.reference_filter_results) {
-                result.reference_filter_results[item.first][res_index - 1] = item.second[indexB];
-            }
-
-            indexB++;
-        }
-    }
-
-    while (indexA < lenA) {
-        if (res_index == 0 || result.docs[res_index - 1] != a.docs[indexA]) {
-            result.docs[res_index] = a.docs[indexA];
-            res_index++;
-        }
-
-        for (auto const& item: a.reference_filter_results) {
-            result.reference_filter_results[item.first][res_index - 1] = item.second[indexA];
-        }
-
-        indexA++;
-    }
-
-    while (indexB < lenB) {
-        if(res_index == 0 || result.docs[res_index - 1] != b.docs[indexB]) {
-            result.docs[res_index] = b.docs[indexB];
-            res_index++;
-        }
-
-        for (auto const& item: b.reference_filter_results) {
-            result.reference_filter_results[item.first][res_index - 1] = item.second[indexB];
-        }
-
-        indexB++;
-    }
-
-    result.count = res_index;
-
-    // shrink fit
-    auto out = new uint32_t[res_index];
-    memcpy(out, result.docs, res_index * sizeof(uint32_t));
-    delete[] result.docs;
-    result.docs = out;
-
-    for (auto &item: result.reference_filter_results) {
-        auto out_references = new reference_filter_result_t[res_index];
-
-        for (uint32_t i = 0; i < result.count; i++) {
-            out_references[i] = item.second[i];
-        }
-        delete[] item.second;
-        item.second = out_references;
-    }
-}
--- a/src/index.cpp
+++ b/src/index.cpp
@ -21,7 +21,6 @@
 #include <or_iterator.h>
 #include <timsort.hpp>
 #include "logger.h"
-#include <collection_manager.h>
 #include "validator.h"

 #define RETURN_CIRCUIT_BREAKER if((std::chrono::duration_cast<std::chrono::microseconds>( \
--- a/test/collection_join_test.cpp
+++ b/test/collection_join_test.cpp
@ -1430,3 +1430,128 @@ TEST_F(CollectionJoinTest, IncludeExcludeFieldsByReference_SingleMatch) {
    ASSERT_EQ(1, res_obj["hits"][0]["document"].count("product_price"));
    ASSERT_EQ(73.5, res_obj["hits"][0]["document"].at("product_price"));
 }
+
+TEST_F(CollectionJoinTest, CascadeDeletion) {
+    auto schema_json =
+            R"({
+                "name": "Products",
+                "fields": [
+                    {"name": "product_id", "type": "string"},
+                    {"name": "product_name", "type": "string", "infix": true},
+                    {"name": "product_description", "type": "string"}
+                ]
+            })"_json;
+    std::vector<nlohmann::json> documents = {
+            R"({
+                "product_id": "product_a",
+                "product_name": "shampoo",
+                "product_description": "Our new moisturizing shampoo is perfect for those with dry or damaged hair."
+            })"_json,
+            R"({
+                "product_id": "product_b",
+                "product_name": "soap",
+                "product_description": "Introducing our all-natural, organic soap bar made with essential oils and botanical ingredients."
+            })"_json
+    };
+
+    auto collection_create_op = collectionManager.create_collection(schema_json);
+    ASSERT_TRUE(collection_create_op.ok());
+    for (auto const &json: documents) {
+        auto add_op = collection_create_op.get()->add(json.dump());
+        ASSERT_TRUE(add_op.ok());
+    }
+
+    schema_json =
+            R"({
+                "name": "Customers",
+                "fields": [
+                    {"name": "customer_id", "type": "string"},
+                    {"name": "customer_name", "type": "string"},
+                    {"name": "product_price", "type": "float"},
+                    {"name": "product_id", "type": "string", "reference": "Products.product_id"}
+                ]
+            })"_json;
+    documents = {
+            R"({
+                "customer_id": "customer_a",
+                "customer_name": "Joe",
+                "product_price": 143,
+                "product_id": "product_a"
+            })"_json,
+            R"({
+                "customer_id": "customer_a",
+                "customer_name": "Joe",
+                "product_price": 73.5,
+                "product_id": "product_b"
+            })"_json,
+            R"({
+                "customer_id": "customer_b",
+                "customer_name": "Dan",
+                "product_price": 75,
+                "product_id": "product_a"
+            })"_json,
+            R"({
+                "customer_id": "customer_b",
+                "customer_name": "Dan",
+                "product_price": 140,
+                "product_id": "product_b"
+            })"_json
+    };
+    collection_create_op = collectionManager.create_collection(schema_json);
+    ASSERT_TRUE(collection_create_op.ok());
+    for (auto const &json: documents) {
+        auto add_op = collection_create_op.get()->add(json.dump());
+        ASSERT_TRUE(add_op.ok());
+    }
+    std::map<std::string, std::string> req_params = {
+            {"collection", "Customers"},
+            {"q", "*"},
+    };
+    nlohmann::json embedded_params;
+    std::string json_res;
+    auto now_ts = std::chrono::duration_cast<std::chrono::microseconds>(
+            std::chrono::system_clock::now().time_since_epoch()).count();
+
+    auto search_op = collectionManager.do_search(req_params, embedded_params, json_res, now_ts);
+
+    nlohmann::json res_obj = nlohmann::json::parse(json_res);
+    ASSERT_EQ(4, res_obj["found"].get<size_t>());
+    ASSERT_EQ("product_b", res_obj["hits"][0]["document"].at("product_id"));
+    ASSERT_EQ("product_a", res_obj["hits"][1]["document"].at("product_id"));
+    ASSERT_EQ("product_b", res_obj["hits"][0]["document"].at("product_id"));
+    ASSERT_EQ("product_a", res_obj["hits"][1]["document"].at("product_id"));
+
+    req_params = {
+            {"collection", "Products"},
+            {"q", "*"},
+    };
+    search_op = collectionManager.do_search(req_params, embedded_params, json_res, now_ts);
+
+    res_obj = nlohmann::json::parse(json_res);
+    ASSERT_EQ(2, res_obj["found"].get<size_t>());
+    ASSERT_EQ("product_b", res_obj["hits"][0]["document"].at("product_id"));
+    ASSERT_EQ("product_a", res_obj["hits"][1]["document"].at("product_id"));
+
+    collectionManager.get_collection_unsafe("Products")->remove("0");
+
+    req_params = {
+            {"collection", "Products"},
+            {"q", "*"},
+    };
+    search_op = collectionManager.do_search(req_params, embedded_params, json_res, now_ts);
+
+    res_obj = nlohmann::json::parse(json_res);
+    ASSERT_EQ(1, res_obj["found"].get<size_t>());
+    ASSERT_EQ("product_b", res_obj["hits"][0]["document"].at("product_id"));
+
+    req_params = {
+            {"collection", "Customers"},
+            {"q", "*"},
+    };
+    search_op = collectionManager.do_search(req_params, embedded_params, json_res, now_ts);
+
+    res_obj = nlohmann::json::parse(json_res);
+    ASSERT_EQ(2, res_obj["found"].get<size_t>());
+    ASSERT_EQ("product_b", res_obj["hits"][0]["document"].at("product_id"));
+    ASSERT_EQ("product_b", res_obj["hits"][1]["document"].at("product_id"));
+}