Merge branch 'v0.25-join' into v0.26-facets

2025-05-19 13:12:22 +08:00 · 2023-08-08 15:57:24 +05:30 · 2023-08-08 15:57:24 +05:30 · 64c12c4cb2
commit 64c12c4cb2
parent b779713fdb 10c1f4c5c1
3 changed files with 169 additions and 16 deletions
--- a/include/field.h
+++ b/include/field.h
@ -23,6 +23,7 @@ namespace field_types {
    static const std::string INT64 = "int64";
    static const std::string FLOAT = "float";
    static const std::string BOOL = "bool";
+    static const std::string NIL = "nil";
    static const std::string GEOPOINT = "geopoint";
    static const std::string STRING_ARRAY = "string[]";
    static const std::string INT32_ARRAY = "int32[]";
@ -434,19 +435,19 @@ struct field {
                                                       std::vector<field>& fields_vec);

    static bool flatten_obj(nlohmann::json& doc, nlohmann::json& value, bool has_array, bool has_obj_array,
-                            const field& the_field, const std::string& flat_name,
+                            bool is_update, const field& the_field, const std::string& flat_name,
                            const std::unordered_map<std::string, field>& dyn_fields,
                            std::unordered_map<std::string, field>& flattened_fields);

    static Option<bool> flatten_field(nlohmann::json& doc, nlohmann::json& obj, const field& the_field,
                                      std::vector<std::string>& path_parts, size_t path_index, bool has_array,
-                                      bool has_obj_array,
+                                      bool has_obj_array, bool is_update,
                                      const std::unordered_map<std::string, field>& dyn_fields,
                                      std::unordered_map<std::string, field>& flattened_fields);

    static Option<bool> flatten_doc(nlohmann::json& document, const tsl::htrie_map<char, field>& nested_fields,
                                    const std::unordered_map<std::string, field>& dyn_fields,
-                                    bool missing_is_ok, std::vector<field>& flattened_fields);
+                                    bool is_update, std::vector<field>& flattened_fields);

    static void compact_nested_fields(tsl::htrie_map<char, field>& nested_fields);
 };
--- a/src/field.cpp
+++ b/src/field.cpp
@ -337,18 +337,41 @@ Option<bool> field::json_field_to_field(bool enable_nested_fields, nlohmann::jso
 }

 bool field::flatten_obj(nlohmann::json& doc, nlohmann::json& value, bool has_array, bool has_obj_array,
-                        const field& the_field, const std::string& flat_name,
+                        bool is_update, const field& the_field, const std::string& flat_name,
                        const std::unordered_map<std::string, field>& dyn_fields,
                        std::unordered_map<std::string, field>& flattened_fields) {
    if(value.is_object()) {
        has_obj_array = has_array;
-        for(const auto& kv: value.items()) {
-            flatten_obj(doc, kv.value(), has_array, has_obj_array, the_field, flat_name + "." + kv.key(),
-                        dyn_fields, flattened_fields);
+        auto it = value.begin();
+        while(it != value.end()) {
+            const std::string& child_field_name = flat_name + "." + it.key();
+            if(it.value().is_null()) {
+                if(has_array) {
+                    doc[child_field_name].push_back(nullptr);
+                } else {
+                    doc[child_field_name] = nullptr;
+                }
+
+                field flattened_field;
+                flattened_field.name = child_field_name;
+                flattened_field.type = field_types::NIL;
+                flattened_fields[child_field_name] = flattened_field;
+
+                if(!is_update) {
+                    // update code path requires and takes care of null values
+                    it = value.erase(it);
+                } else {
+                    it++;
+                }
+            } else {
+                flatten_obj(doc, it.value(), has_array, has_obj_array, is_update, the_field, child_field_name,
+                            dyn_fields, flattened_fields);
+                it++;
+            }
        }
    } else if(value.is_array()) {
        for(const auto& kv: value.items()) {
-            flatten_obj(doc, kv.value(), true, has_obj_array, the_field, flat_name, dyn_fields, flattened_fields);
+            flatten_obj(doc, kv.value(), true, has_obj_array, is_update, the_field, flat_name, dyn_fields, flattened_fields);
        }
    } else { // must be a primitive
        if(doc.count(flat_name) != 0 && flattened_fields.find(flat_name) == flattened_fields.end()) {
@ -404,7 +427,7 @@ bool field::flatten_obj(nlohmann::json& doc, nlohmann::json& value, bool has_arr

 Option<bool> field::flatten_field(nlohmann::json& doc, nlohmann::json& obj, const field& the_field,
                                  std::vector<std::string>& path_parts, size_t path_index,
-                                  bool has_array, bool has_obj_array,
+                                  bool has_array, bool has_obj_array, bool is_update,
                                  const std::unordered_map<std::string, field>& dyn_fields,
                                  std::unordered_map<std::string, field>& flattened_fields) {
    if(path_index == path_parts.size()) {
@ -459,7 +482,8 @@ Option<bool> field::flatten_field(nlohmann::json& doc, nlohmann::json& obj, cons

        if(detected_type == the_field.type || is_numericaly_valid) {
            if(the_field.is_object()) {
-                flatten_obj(doc, obj, has_array, has_obj_array, the_field, the_field.name, dyn_fields, flattened_fields);
+                flatten_obj(doc, obj, has_array, has_obj_array, is_update, the_field, the_field.name,
+                            dyn_fields, flattened_fields);
            } else {
                if(doc.count(the_field.name) != 0 && flattened_fields.find(the_field.name) == flattened_fields.end()) {
                    return Option<bool>(true);
@ -502,7 +526,7 @@ Option<bool> field::flatten_field(nlohmann::json& doc, nlohmann::json& obj, cons
            for(auto& ele: it.value()) {
                has_obj_array = has_obj_array || ele.is_object();
                Option<bool> op = flatten_field(doc, ele, the_field, path_parts, path_index + 1, has_array,
-                                                has_obj_array, dyn_fields, flattened_fields);
+                                                has_obj_array, is_update, dyn_fields, flattened_fields);
                if(!op.ok()) {
                    return op;
                }
@ -510,7 +534,7 @@ Option<bool> field::flatten_field(nlohmann::json& doc, nlohmann::json& obj, cons
            return Option<bool>(true);
        } else {
            return flatten_field(doc, it.value(), the_field, path_parts, path_index + 1, has_array, has_obj_array,
-                                 dyn_fields, flattened_fields);
+                                 is_update, dyn_fields, flattened_fields);
        }
    } {
        return Option<bool>(404, "Field `" + the_field.name + "` not found.");
@ -520,7 +544,7 @@ Option<bool> field::flatten_field(nlohmann::json& doc, nlohmann::json& obj, cons
 Option<bool> field::flatten_doc(nlohmann::json& document,
                                const tsl::htrie_map<char, field>& nested_fields,
                                const std::unordered_map<std::string, field>& dyn_fields,
-                                bool missing_is_ok, std::vector<field>& flattened_fields) {
+                                bool is_update, std::vector<field>& flattened_fields) {

    std::unordered_map<std::string, field> flattened_fields_map;

@ -534,12 +558,12 @@ Option<bool> field::flatten_doc(nlohmann::json& document,
        }

        auto op = flatten_field(document, document, nested_field, field_parts, 0, false, false,
-                                dyn_fields, flattened_fields_map);
+                                is_update, dyn_fields, flattened_fields_map);
        if(op.ok()) {
            continue;
        }

-        if(op.code() == 404 && (missing_is_ok || nested_field.optional)) {
+        if(op.code() == 404 && (is_update || nested_field.optional)) {
            continue;
        } else {
            return op;
@ -549,7 +573,10 @@ Option<bool> field::flatten_doc(nlohmann::json& document,
    document[".flat"] = nlohmann::json::array();
    for(auto& kv: flattened_fields_map) {
        document[".flat"].push_back(kv.second.name);
-        flattened_fields.push_back(kv.second);
+        if(kv.second.type != field_types::NIL) {
+            // not a real field so we won't add it
+            flattened_fields.push_back(kv.second);
+        }
    }

    return Option<bool>(true);
--- a/test/collection_nested_fields_test.cpp
+++ b/test/collection_nested_fields_test.cpp
@ -2560,6 +2560,131 @@ TEST_F(CollectionNestedFieldsTest, NullValuesWithExplicitSchema) {
    auto results = coll1->search("jack", {"name.first"}, "", {}, {}, {0}, 10, 1, FREQUENCY, {false}).get();
    ASSERT_EQ(1, results["found"].get<size_t>());
    ASSERT_EQ(2, results["hits"][0]["document"].size());  // id, name
+    ASSERT_EQ(1, results["hits"][0]["document"]["name"].size());  // name.first
+    ASSERT_EQ("Jack", results["hits"][0]["document"]["name"]["first"].get<std::string>());
+}
+
+TEST_F(CollectionNestedFieldsTest, EmplaceWithNullValueOnRequiredField) {
+    nlohmann::json schema = R"({
+        "name": "coll1",
+        "enable_nested_fields": true,
+        "fields": [
+            {"name":"currency", "type":"object"},
+            {"name":"currency.eu", "type":"int32", "optional": false}
+        ]
+    })"_json;
+
+    auto op = collectionManager.create_collection(schema);
+    ASSERT_TRUE(op.ok());
+    Collection *coll1 = op.get();
+
+    auto doc1 = R"({
+      "id": "0",
+      "currency": {
+        "eu": 12000
+      }
+    })"_json;
+
+    auto add_op = coll1->add(doc1.dump(), CREATE);
+    ASSERT_TRUE(add_op.ok());
+
+    // now update with null value -- should not be allowed
+    auto update_doc = R"({
+      "id": "0",
+      "currency": {
+        "eu": null
+      }
+    })"_json;
+
+    auto update_op = coll1->add(update_doc.dump(), EMPLACE);
+    ASSERT_FALSE(update_op.ok());
+    ASSERT_EQ("Field `currency.eu` must be an int32.", update_op.error());
+}
+
+TEST_F(CollectionNestedFieldsTest, EmplaceWithNullValueOnOptionalField) {
+    nlohmann::json schema = R"({
+        "name": "coll1",
+        "enable_nested_fields": true,
+        "fields": [
+            {"name":"currency", "type":"object"},
+            {"name":"currency.eu", "type":"int32", "optional": true}
+        ]
+    })"_json;
+
+    auto op = collectionManager.create_collection(schema);
+    ASSERT_TRUE(op.ok());
+    Collection *coll1 = op.get();
+
+    auto doc1 = R"({
+      "id": "0",
+      "currency": {
+        "eu": 12000
+      }
+    })"_json;
+
+    auto add_op = coll1->add(doc1.dump(), CREATE);
+    ASSERT_TRUE(add_op.ok());
+
+    // now update with null value -- should be allowed since field is optional
+    auto update_doc = R"({
+      "id": "0",
+      "currency": {
+        "eu": null
+      }
+    })"_json;
+
+    auto update_op = coll1->add(update_doc.dump(), EMPLACE);
+    ASSERT_TRUE(update_op.ok());
+
+    // try to fetch the document to see the stored value
+    auto results = coll1->search("*", {}, "", {}, {}, {0}, 10, 1, FREQUENCY, {false}).get();
+    ASSERT_EQ(1, results["found"].get<size_t>());
+    ASSERT_EQ(2, results["hits"][0]["document"].size());  // id, currency
+    ASSERT_EQ(0, results["hits"][0]["document"]["currency"].size());
+}
+
+TEST_F(CollectionNestedFieldsTest, EmplaceWithMissingArrayValueOnOptionalField) {
+    nlohmann::json schema = R"({
+        "name": "coll1",
+        "enable_nested_fields": true,
+        "fields": [
+            {"name":"currency", "type":"object[]"},
+            {"name":"currency.eu", "type":"int32[]", "optional": true}
+        ]
+    })"_json;
+
+    auto op = collectionManager.create_collection(schema);
+    ASSERT_TRUE(op.ok());
+    Collection *coll1 = op.get();
+
+    auto doc1 = R"({
+      "id": "0",
+      "currency": [
+        {"eu": 12000},
+        {"us": 10000}
+      ]
+    })"_json;
+
+    auto add_op = coll1->add(doc1.dump(), CREATE);
+    ASSERT_TRUE(add_op.ok());
+
+    // now update with null value -- should be allowed since field is optional
+    auto update_doc = R"({
+      "id": "0",
+      "currency": [
+        {"us": 10000}
+      ]
+    })"_json;
+
+    auto update_op = coll1->add(update_doc.dump(), EMPLACE);
+    ASSERT_TRUE(update_op.ok());
+
+    // try to fetch the document to see the stored value
+    auto results = coll1->search("*", {}, "", {}, {}, {0}, 10, 1, FREQUENCY, {false}).get();
+    ASSERT_EQ(1, results["found"].get<size_t>());
+    ASSERT_EQ(2, results["hits"][0]["document"].size());  // id, currency
+    ASSERT_EQ(1, results["hits"][0]["document"]["currency"].size());
+    ASSERT_EQ(10000, results["hits"][0]["document"]["currency"][0]["us"].get<uint32_t>());
 }

 TEST_F(CollectionNestedFieldsTest, UpdateNestedDocument) {