Allow fields to be marked as optional in the schema.

Downside: optional fields cannot be used for sorting or marked as default sorting field.
2025-05-15 10:42:29 +08:00 · 2020-03-05 21:56:05 +05:30 · 2020-03-05 21:56:05 +05:30 · 6c8e62a61b
commit 6c8e62a61b
parent ba17243a36
8 changed files with 110 additions and 17 deletions
--- a/include/field.h
+++ b/include/field.h
@ -28,8 +28,15 @@ struct field {
    std::string name;
    std::string type;
    bool facet;
+    bool optional;

-    field(const std::string & name, const std::string & type, const bool & facet): name(name), type(type), facet(facet) {
+    field(const std::string & name, const std::string & type, const bool facet):
+        name(name), type(type), facet(facet), optional(false) {
+
+    }
+
+    field(const std::string & name, const std::string & type, const bool facet, const bool optional):
+            name(name), type(type), facet(facet), optional(optional) {

    }

--- a/src/collection.cpp
+++ b/src/collection.cpp
@ -513,6 +513,11 @@ Option<nlohmann::json> Collection::search(const std::string & query, const std::
            return Option<nlohmann::json>(404, error);
        }

+        if(sort_schema.count(_sort_field.name) != 0 && sort_schema.at(_sort_field.name).optional) {
+            std::string error = "Cannot sort by `" + _sort_field.name + "` as it is defined as an optional field.";
+            return Option<nlohmann::json>(400, error);
+        }
+
        std::string sort_order = _sort_field.order;
        StringUtils::toupper(sort_order);

--- a/src/collection_manager.cpp
+++ b/src/collection_manager.cpp
@ -227,6 +227,11 @@ Option<Collection*> CollectionManager::create_collection(const std::string name,
        }

        if(field.name == default_sorting_field) {
+            if(field.optional) {
+                return Option<Collection*>(400, "Default sorting field `" + default_sorting_field +
+                                                "` cannot be an optional field.");
+            }
+
            found_default_sorting_field = true;
        }
    }
--- a/src/core_api.cpp
+++ b/src/core_api.cpp
@ -114,8 +114,12 @@ void post_create_collection(http_req & req, http_res & res) {
            field_json["facet"] = false;
        }

-        fields.push_back(
-            field(field_json["name"], field_json["type"], field_json["facet"])
+        if(field_json.count("optional") == 0) {
+            field_json["optional"] = false;
+        }
+
+        fields.emplace_back(
+            field(field_json["name"], field_json["type"], field_json["facet"], field_json["optional"])
        );
    }

--- a/src/index.cpp
+++ b/src/index.cpp
@ -103,6 +103,10 @@ Option<uint32_t> Index::index_in_memory(const nlohmann::json &document, uint32_t
    for(const std::pair<std::string, field> & field_pair: search_schema) {
        const std::string & field_name = field_pair.first;

+        if(field_pair.second.optional && document.count(field_name) == 0) {
+            continue;
+        }
+
        int facet_id = -1;
        if(facet_schema.count(field_name) != 0) {
            facet_id = facet_to_id[field_name];
@ -230,9 +234,13 @@ Option<uint32_t> Index::validate_index_in_memory(const nlohmann::json &document,
    for(const std::pair<std::string, field> & field_pair: search_schema) {
        const std::string & field_name = field_pair.first;

+        if(field_pair.second.optional && document.count(field_name) == 0) {
+            continue;
+        }
+
        if(document.count(field_name) == 0) {
            return Option<>(400, "Field `" + field_name  + "` has been declared in the schema, "
-                    "but is not found in the document.");
+                                 "but is not found in the document.");
        }

        if(field_pair.second.type == field_types::STRING) {
@ -301,16 +309,6 @@ Option<uint32_t> Index::validate_index_in_memory(const nlohmann::json &document,
        }
    }

-    // since every facet field has to be a search field, we don't have to revalidate types here
-    for(const std::pair<std::string, field> & field_pair: facet_schema) {
-        const std::string & field_name = field_pair.first;
-
-        if(document.count(field_name) == 0) {
-            return Option<>(400, "Field `" + field_name  + "` has been declared as a facet field in the schema, "
-                    "but is not found in the document.");
-        }
-    }
-
    return Option<>(200);
 }

--- a/test/collection_sorting_test.cpp
+++ b/test/collection_sorting_test.cpp
@ -225,8 +225,6 @@ TEST_F(CollectionSortingTest, ThreeSortFieldsLimit) {
                                 field("max", field_types::INT32, false),
                                 };

-    std::vector<sort_by> sort_fields = { sort_by("points", "DESC") };
-
    coll1 = collectionManager.get_collection("coll1");
    if(coll1 == nullptr) {
        coll1 = collectionManager.create_collection("coll1", fields, "points").get();
@ -250,7 +248,7 @@ TEST_F(CollectionSortingTest, ThreeSortFieldsLimit) {
        sort_by("min", "DESC"),
    };

-    auto res_op = coll1->search("Jeremy", query_fields, "", {}, sort_fields_desc, 0, 10, 1, FREQUENCY, false);
+    auto res_op = coll1->search("the", query_fields, "", {}, sort_fields_desc, 0, 10, 1, FREQUENCY, false);

    ASSERT_FALSE(res_op.ok());
    ASSERT_STREQ("Only upto 3 sort_by fields can be specified.", res_op.error().c_str());
--- a/test/collection_test.cpp
+++ b/test/collection_test.cpp
@ -2290,3 +2290,73 @@ TEST_F(CollectionTest, SearchHighlightFieldFully) {

    collectionManager.drop_collection("coll1");
 }
+
+TEST_F(CollectionTest, OptionalFields) {
+    Collection *coll1;
+
+    std::vector<field> fields = {
+        field("title", field_types::STRING, false),
+        field("description", field_types::STRING, true, true),
+        field("max", field_types::INT32, false),
+        field("scores", field_types::INT64_ARRAY, false, true),
+        field("average", field_types::FLOAT, false, true),
+        field("is_valid", field_types::BOOL, false, true),
+    };
+
+    coll1 = collectionManager.get_collection("coll1");
+    if(coll1 == nullptr) {
+        coll1 = collectionManager.create_collection("coll1", fields, "max").get();
+    }
+
+    std::ifstream infile(std::string(ROOT_DIR)+"test/optional_fields.jsonl");
+
+    std::string json_line;
+
+    while (std::getline(infile, json_line)) {
+        auto add_op = coll1->add(json_line);
+        if(!add_op.ok()) {
+            std::cout << add_op.error() << std::endl;
+        }
+        ASSERT_TRUE(add_op.ok());
+    }
+
+    infile.close();
+
+    // first must be able to fetch all records (i.e. all must have been index)
+
+    auto res = coll1->search("*", {"title"}, "", {}, {}, 0, 10, 1, FREQUENCY, false).get();
+    ASSERT_EQ(6, res["found"].get<size_t>());
+
+    // search on optional `description` field
+    res = coll1->search("book", {"description"}, "", {}, {}, 0, 10, 1, FREQUENCY, false).get();
+    ASSERT_EQ(5, res["found"].get<size_t>());
+
+    // filter on optional `average` field
+    res = coll1->search("the", {"title"}, "average: >0", {}, {}, 0, 10, 1, FREQUENCY, false).get();
+    ASSERT_EQ(5, res["found"].get<size_t>());
+
+    // facet on optional `description` field
+    res = coll1->search("the", {"title"}, "", {"description"}, {}, 0, 10, 1, FREQUENCY, false).get();
+    ASSERT_EQ(6, res["found"].get<size_t>());
+    ASSERT_EQ(5, res["facet_counts"][0]["counts"][0]["count"].get<size_t>());
+    ASSERT_STREQ("description", res["facet_counts"][0]["field_name"].get<std::string>().c_str());
+
+    // sort_by optional `average` field should be rejected
+    std::vector<sort_by> sort_fields = { sort_by("average", "DESC") };
+    auto res_op = coll1->search("*", {"title"}, "", {}, sort_fields, 0, 10, 1, FREQUENCY, false);
+    ASSERT_FALSE(res_op.ok());
+    ASSERT_STREQ("Cannot sort by `average` as it is defined as an optional field.", res_op.error().c_str());
+
+    // default sorting field should not be declared optional
+    fields = {
+        field("title", field_types::STRING, false),
+        field("score", field_types::INT32, false, true),
+    };
+
+    auto create_op = collectionManager.create_collection("coll2", fields, "score");
+
+    ASSERT_FALSE(create_op.ok());
+    ASSERT_STREQ("Default sorting field `score` cannot be an optional field.", create_op.error().c_str());
+
+    collectionManager.drop_collection("coll1");
+}
--- a/test/optional_fields.jsonl
+++ b/test/optional_fields.jsonl
@ -0,0 +1,6 @@
+{"title": "The quick brown fox.", "description": "A book.", "max": 25, "scores": [10, 25, 15], "average": 16.66, "is_valid": true}
+{"title": "The quick brown fox.", "max": 25, "scores": [10, 25, 15], "average": 16.66, "is_valid": true}
+{"title": "The quick brown fox.", "description": "A book.", "max": 25, "scores": [10, 25, 15], "average": 16.66, "is_valid": true}
+{"title": "The quick brown fox.", "description": "A book.", "max": 25, "average": 16.66, "is_valid": true}
+{"title": "The quick brown fox.", "description": "A book.", "max": 25, "scores": [10, 25, 15], "is_valid": true}
+{"title": "The quick brown fox.", "description": "A book.", "max": 25, "scores": [10, 25, 15], "average": 16.66}