Merge branch 'v0.26-facets' into nested_facet_parent

This commit is contained in:
krunal 2023-07-26 14:01:27 +05:30
commit 67fe400065
5 changed files with 157 additions and 168 deletions

View File

@ -133,6 +133,11 @@ private:
/// "field name" -> reference_pair(referenced_collection_name, referenced_field_name)
spp::sparse_hash_map<std::string, reference_pair> reference_fields;
/// Contains the info where the current collection is referenced.
/// Useful to perform operations such as cascading delete.
/// collection_name -> field_name
spp::sparse_hash_map<std::string, std::string> referenced_in;
// Keep index as the last field since it is initialized in the constructor via init_index(). Add a new field before it.
Index* index;

View File

@ -3430,6 +3430,23 @@ void Collection::remove_document(const nlohmann::json & document, const uint32_t
store->remove(get_doc_id_key(id));
store->remove(get_seq_id_key(seq_id));
}
if (referenced_in.empty()) {
return;
}
CollectionManager& collectionManager = CollectionManager::get_instance();
// Cascade delete all the references.
for (const auto &item: referenced_in) {
auto ref_coll = collectionManager.get_collection(item.first);
if (ref_coll != nullptr) {
filter_result_t filter_result;
ref_coll->get_filter_ids(item.second + ":=" + id, filter_result);
for (uint32_t i = 0; i < filter_result.count; i++) {
ref_coll->remove(std::to_string(filter_result.docs[i]));
}
}
}
}
Option<std::string> Collection::remove(const std::string & id, const bool remove_from_store) {
@ -4691,10 +4708,17 @@ Index* Collection::init_index() {
if(!field.reference.empty()) {
auto dot_index = field.reference.find('.');
auto collection_name = field.reference.substr(0, dot_index);
auto field_name = field.reference.substr(dot_index + 1);
auto ref_coll_name = field.reference.substr(0, dot_index);
auto ref_field_name = field.reference.substr(dot_index + 1);
reference_fields.emplace(field.name, reference_pair(collection_name, field_name));
reference_fields.emplace(field.name, reference_pair(ref_coll_name, ref_field_name));
auto& collectionManager = CollectionManager::get_instance();
auto ref_coll = collectionManager.get_collection(ref_coll_name);
if (ref_coll != nullptr) {
// Passing reference helper field helps perform operation on doc_id instead of field value.
ref_coll->referenced_in.emplace(name, field.name + REFERENCE_HELPER_FIELD_SUFFIX);
}
}
}

View File

@ -642,167 +642,3 @@ Option<bool> field::validate_and_init_embed_fields(const std::vector<std::pair<s
return Option<bool>(true);
}
void filter_result_t::and_filter_results(const filter_result_t& a, const filter_result_t& b, filter_result_t& result) {
auto lenA = a.count, lenB = b.count;
if (lenA == 0 || lenB == 0) {
return;
}
result.docs = new uint32_t[std::min(lenA, lenB)];
auto A = a.docs, B = b.docs, out = result.docs;
const uint32_t *endA = A + lenA;
const uint32_t *endB = B + lenB;
// Add an entry of references in the result for each unique collection in a and b.
for (auto const& item: a.reference_filter_results) {
if (result.reference_filter_results.count(item.first) == 0) {
result.reference_filter_results[item.first] = new reference_filter_result_t[std::min(lenA, lenB)];
}
}
for (auto const& item: b.reference_filter_results) {
if (result.reference_filter_results.count(item.first) == 0) {
result.reference_filter_results[item.first] = new reference_filter_result_t[std::min(lenA, lenB)];
}
}
while (true) {
while (*A < *B) {
SKIP_FIRST_COMPARE:
if (++A == endA) {
result.count = out - result.docs;
return;
}
}
while (*A > *B) {
if (++B == endB) {
result.count = out - result.docs;
return;
}
}
if (*A == *B) {
*out = *A;
// Copy the references of the document from every collection into result.
for (auto const& item: a.reference_filter_results) {
result.reference_filter_results[item.first][out - result.docs] = item.second[A - a.docs];
}
for (auto const& item: b.reference_filter_results) {
result.reference_filter_results[item.first][out - result.docs] = item.second[B - b.docs];
}
out++;
if (++A == endA || ++B == endB) {
result.count = out - result.docs;
return;
}
} else {
goto SKIP_FIRST_COMPARE;
}
}
}
void filter_result_t::or_filter_results(const filter_result_t& a, const filter_result_t& b, filter_result_t& result) {
if (a.count == 0 && b.count == 0) {
return;
}
// If either one of a or b does not have any matches, copy other into result.
if (a.count == 0) {
result = b;
return;
}
if (b.count == 0) {
result = a;
return;
}
size_t indexA = 0, indexB = 0, res_index = 0, lenA = a.count, lenB = b.count;
result.docs = new uint32_t[lenA + lenB];
// Add an entry of references in the result for each unique collection in a and b.
for (auto const& item: a.reference_filter_results) {
if (result.reference_filter_results.count(item.first) == 0) {
result.reference_filter_results[item.first] = new reference_filter_result_t[lenA + lenB];
}
}
for (auto const& item: b.reference_filter_results) {
if (result.reference_filter_results.count(item.first) == 0) {
result.reference_filter_results[item.first] = new reference_filter_result_t[lenA + lenB];
}
}
while (indexA < lenA && indexB < lenB) {
if (a.docs[indexA] < b.docs[indexB]) {
// check for duplicate
if (res_index == 0 || result.docs[res_index - 1] != a.docs[indexA]) {
result.docs[res_index] = a.docs[indexA];
res_index++;
}
// Copy references of the last result document from every collection in a.
for (auto const& item: a.reference_filter_results) {
result.reference_filter_results[item.first][res_index - 1] = item.second[indexA];
}
indexA++;
} else {
if (res_index == 0 || result.docs[res_index - 1] != b.docs[indexB]) {
result.docs[res_index] = b.docs[indexB];
res_index++;
}
for (auto const& item: b.reference_filter_results) {
result.reference_filter_results[item.first][res_index - 1] = item.second[indexB];
}
indexB++;
}
}
while (indexA < lenA) {
if (res_index == 0 || result.docs[res_index - 1] != a.docs[indexA]) {
result.docs[res_index] = a.docs[indexA];
res_index++;
}
for (auto const& item: a.reference_filter_results) {
result.reference_filter_results[item.first][res_index - 1] = item.second[indexA];
}
indexA++;
}
while (indexB < lenB) {
if(res_index == 0 || result.docs[res_index - 1] != b.docs[indexB]) {
result.docs[res_index] = b.docs[indexB];
res_index++;
}
for (auto const& item: b.reference_filter_results) {
result.reference_filter_results[item.first][res_index - 1] = item.second[indexB];
}
indexB++;
}
result.count = res_index;
// shrink fit
auto out = new uint32_t[res_index];
memcpy(out, result.docs, res_index * sizeof(uint32_t));
delete[] result.docs;
result.docs = out;
for (auto &item: result.reference_filter_results) {
auto out_references = new reference_filter_result_t[res_index];
for (uint32_t i = 0; i < result.count; i++) {
out_references[i] = item.second[i];
}
delete[] item.second;
item.second = out_references;
}
}

View File

@ -21,7 +21,6 @@
#include <or_iterator.h>
#include <timsort.hpp>
#include "logger.h"
#include <collection_manager.h>
#include "validator.h"
#define RETURN_CIRCUIT_BREAKER if((std::chrono::duration_cast<std::chrono::microseconds>( \

View File

@ -1430,3 +1430,128 @@ TEST_F(CollectionJoinTest, IncludeExcludeFieldsByReference_SingleMatch) {
ASSERT_EQ(1, res_obj["hits"][0]["document"].count("product_price"));
ASSERT_EQ(73.5, res_obj["hits"][0]["document"].at("product_price"));
}
TEST_F(CollectionJoinTest, CascadeDeletion) {
auto schema_json =
R"({
"name": "Products",
"fields": [
{"name": "product_id", "type": "string"},
{"name": "product_name", "type": "string", "infix": true},
{"name": "product_description", "type": "string"}
]
})"_json;
std::vector<nlohmann::json> documents = {
R"({
"product_id": "product_a",
"product_name": "shampoo",
"product_description": "Our new moisturizing shampoo is perfect for those with dry or damaged hair."
})"_json,
R"({
"product_id": "product_b",
"product_name": "soap",
"product_description": "Introducing our all-natural, organic soap bar made with essential oils and botanical ingredients."
})"_json
};
auto collection_create_op = collectionManager.create_collection(schema_json);
ASSERT_TRUE(collection_create_op.ok());
for (auto const &json: documents) {
auto add_op = collection_create_op.get()->add(json.dump());
ASSERT_TRUE(add_op.ok());
}
schema_json =
R"({
"name": "Customers",
"fields": [
{"name": "customer_id", "type": "string"},
{"name": "customer_name", "type": "string"},
{"name": "product_price", "type": "float"},
{"name": "product_id", "type": "string", "reference": "Products.product_id"}
]
})"_json;
documents = {
R"({
"customer_id": "customer_a",
"customer_name": "Joe",
"product_price": 143,
"product_id": "product_a"
})"_json,
R"({
"customer_id": "customer_a",
"customer_name": "Joe",
"product_price": 73.5,
"product_id": "product_b"
})"_json,
R"({
"customer_id": "customer_b",
"customer_name": "Dan",
"product_price": 75,
"product_id": "product_a"
})"_json,
R"({
"customer_id": "customer_b",
"customer_name": "Dan",
"product_price": 140,
"product_id": "product_b"
})"_json
};
collection_create_op = collectionManager.create_collection(schema_json);
ASSERT_TRUE(collection_create_op.ok());
for (auto const &json: documents) {
auto add_op = collection_create_op.get()->add(json.dump());
ASSERT_TRUE(add_op.ok());
}
std::map<std::string, std::string> req_params = {
{"collection", "Customers"},
{"q", "*"},
};
nlohmann::json embedded_params;
std::string json_res;
auto now_ts = std::chrono::duration_cast<std::chrono::microseconds>(
std::chrono::system_clock::now().time_since_epoch()).count();
auto search_op = collectionManager.do_search(req_params, embedded_params, json_res, now_ts);
nlohmann::json res_obj = nlohmann::json::parse(json_res);
ASSERT_EQ(4, res_obj["found"].get<size_t>());
ASSERT_EQ("product_b", res_obj["hits"][0]["document"].at("product_id"));
ASSERT_EQ("product_a", res_obj["hits"][1]["document"].at("product_id"));
ASSERT_EQ("product_b", res_obj["hits"][0]["document"].at("product_id"));
ASSERT_EQ("product_a", res_obj["hits"][1]["document"].at("product_id"));
req_params = {
{"collection", "Products"},
{"q", "*"},
};
search_op = collectionManager.do_search(req_params, embedded_params, json_res, now_ts);
res_obj = nlohmann::json::parse(json_res);
ASSERT_EQ(2, res_obj["found"].get<size_t>());
ASSERT_EQ("product_b", res_obj["hits"][0]["document"].at("product_id"));
ASSERT_EQ("product_a", res_obj["hits"][1]["document"].at("product_id"));
collectionManager.get_collection_unsafe("Products")->remove("0");
req_params = {
{"collection", "Products"},
{"q", "*"},
};
search_op = collectionManager.do_search(req_params, embedded_params, json_res, now_ts);
res_obj = nlohmann::json::parse(json_res);
ASSERT_EQ(1, res_obj["found"].get<size_t>());
ASSERT_EQ("product_b", res_obj["hits"][0]["document"].at("product_id"));
req_params = {
{"collection", "Customers"},
{"q", "*"},
};
search_op = collectionManager.do_search(req_params, embedded_params, json_res, now_ts);
res_obj = nlohmann::json::parse(json_res);
ASSERT_EQ(2, res_obj["found"].get<size_t>());
ASSERT_EQ("product_b", res_obj["hits"][0]["document"].at("product_id"));
ASSERT_EQ("product_b", res_obj["hits"][1]["document"].at("product_id"));
}