Use string* to specify string/string array coercion.

2025-05-21 22:33:27 +08:00 · 2021-02-25 15:07:37 +05:30 · 2021-02-25 15:07:37 +05:30 · 4e3307a891
commit 4e3307a891
parent 3a4d21992c
10 changed files with 155 additions and 72 deletions
--- a/include/collection.h
+++ b/include/collection.h
@ -323,7 +323,7 @@ private:

    const std::vector<Index*> indices;

-    const std::string auto_detect_schema;
+    const std::string fallback_field_type;

    // methods

@ -409,7 +409,7 @@ public:
    static constexpr const char* COLLECTION_DEFAULT_SORTING_FIELD_KEY = "default_sorting_field";
    static constexpr const char* COLLECTION_CREATED = "created_at";
    static constexpr const char* COLLECTION_NUM_MEMORY_SHARDS = "num_memory_shards";
-    static constexpr const char* COLLECTION_AUTO_DETECT_SCHEMA = "auto_detect_schema";
+    static constexpr const char* COLLECTION_FALLBACK_FIELD_TYPE = "fallback_field_type";

    // DON'T CHANGE THESE VALUES!
    // this key is used as namespace key to store metadata about the document
@ -423,7 +423,7 @@ public:
    Collection(const std::string& name, const uint32_t collection_id, const uint64_t created_at,
               const uint32_t next_seq_id, Store *store, const std::vector<field>& fields,
               const std::string& default_sorting_field, const size_t num_memory_shards,
-               const float max_memory_ratio, const std::string& auto_detect_schema);
+               const float max_memory_ratio, const std::string& fallback_field_type);

    ~Collection();

--- a/include/collection_manager.h
+++ b/include/collection_manager.h
@ -125,11 +125,15 @@ public:
    bool auth_key_matches(const std::string& auth_key_sent, const std::string& action,
                          const std::vector<std::string>& collections, std::map<std::string, std::string>& params) const;

+    static Option<Collection*> create_collection(nlohmann::json& req_json,
+                                          const size_t num_memory_shards,
+                                          const std::string & default_sorting_field);
+
    Option<Collection*> create_collection(const std::string& name, const size_t num_memory_shards,
                                          const std::vector<field> & fields,
                                          const std::string & default_sorting_field="",
                                          const uint64_t created_at = static_cast<uint64_t>(std::time(nullptr)),
-                                          const std::string& auto_detect_schema = schema_detect_types::OFF);
+                                          const std::string& fallback_field_type = "");

    locked_resource_view_t<Collection> get_collection(const std::string & collection_name) const;

--- a/include/field.h
+++ b/include/field.h
@ -7,6 +7,9 @@
 #include "json.hpp"

 namespace field_types {
+    // first field value indexed will determine the type
+    static const std::string AUTO = "auto";
+
    static const std::string STRING = "string";
    static const std::string INT32 = "int32";
    static const std::string INT64 = "int64";
@ -18,6 +21,10 @@ namespace field_types {
    static const std::string INT64_ARRAY = "int64[]";
    static const std::string FLOAT_ARRAY = "float[]";
    static const std::string BOOL_ARRAY = "bool[]";
+
+    static bool is_string_or_array(const std::string type_def) {
+        return type_def == "string*";
+    }
 }

 namespace fields {
@ -27,12 +34,6 @@ namespace fields {
    static const std::string optional = "optional";
 }

-namespace schema_detect_types {
-    static const std::string OFF = "off";
-    static const std::string STRINGIFY = "stringify";
-    static const std::string AUTO = "auto";
-}
-
 static const uint8_t DEFAULT_GEO_RESOLUTION = 7;
 static const uint8_t FINEST_GEO_RESOLUTION = 15;

@ -61,7 +62,7 @@ struct field {
    }

    bool is_auto() const {
-        return (type == schema_detect_types::AUTO || type == schema_detect_types::STRINGIFY);
+        return (type == field_types::AUTO);
    }

    bool is_single_integer() const {
@ -129,7 +130,11 @@ struct field {
    }

    bool has_valid_type() const {
-        return is_string() || is_integer() || is_float() || is_bool() || is_geopoint() || is_auto();
+        bool is_basic_type = is_string() || is_integer() || is_float() || is_bool() || is_geopoint() || is_auto();
+        if(!is_basic_type) {
+            return field_types::is_string_or_array(type);
+        }
+        return true;
    }

    std::string faceted_name() const {
@ -227,7 +232,7 @@ struct field {
    }

    static Option<bool> json_fields_to_fields(nlohmann::json& fields_json,
-                                              std::string& auto_detect_schema,
+                                              std::string& fallback_field_type,
                                              std::vector<field>& fields) {
        size_t num_auto_detect_fields = 0;

@ -251,13 +256,6 @@ struct field {
            }

            if(field_json["name"] == "*") {
-                if(field_json["type"] == schema_detect_types::AUTO || field_json["type"] == schema_detect_types::STRINGIFY) {
-                    auto_detect_schema = field_json["type"];
-                    num_auto_detect_fields++;
-                } else {
-                    return Option<bool>(400, "The `type` of field `*` is invalid.");
-                }
-
                if(field_json.count("facet") == 0) {
                    field_json["facet"] = false;
                }
@ -273,6 +271,19 @@ struct field {
                if(field_json["facet"] == true) {
                    return Option<bool>(400, "Field `*` cannot be a facet field.");
                }
+
+                field fallback_field(field_json["name"], field_json["type"], field_json["facet"],
+                                     field_json["optional"]);
+
+                if(fallback_field.has_valid_type()) {
+                    fallback_field_type = fallback_field.type;
+                    num_auto_detect_fields++;
+                } else {
+                    return Option<bool>(400, "The `type` of field `*` is invalid.");
+                }
+
+                fields.emplace_back(fallback_field);
+                continue;
            }

            if(field_json.count("facet") == 0) {
--- a/include/index.h
+++ b/include/index.h
@ -246,7 +246,7 @@ private:
    static void get_doc_changes(const nlohmann::json &document, nlohmann::json &old_doc,
                                nlohmann::json &new_doc, nlohmann::json &del_doc);

-    static Option<uint32_t> coerce_string(const DIRTY_VALUES& dirty_values, const std::string& auto_detect_schema,
+    static Option<uint32_t> coerce_string(const DIRTY_VALUES& dirty_values, const std::string& fallback_field_type,
                                          const field& a_field, nlohmann::json &document,
                                          const std::string &field_name,
                                          nlohmann::json::iterator& array_iter,
@ -378,7 +378,7 @@ public:
                                     const std::string & default_sorting_field,
                                     const std::unordered_map<std::string, field> & search_schema,
                                     const std::map<std::string, field> & facet_schema,
-                                     const std::string& auto_detect_schema);
+                                     const std::string& fallback_field_type);

    static void populate_token_positions(const std::vector<art_leaf *> &query_suggestion,
                                         const std::vector<uint32_t*>& leaf_to_indices,
@ -396,7 +396,7 @@ public:
                                                     const std::unordered_map<std::string, field> & search_schema,
                                                     const std::map<std::string, field> & facet_schema,
                                                     bool is_update,
-                                                     const std::string& auto_detect_schema,
+                                                     const std::string& fallback_field_type,
                                                     const DIRTY_VALUES& dirty_values);

    void refresh_schemas(const std::vector<field>& new_fields);
--- a/src/collection.cpp
+++ b/src/collection.cpp
@ -40,13 +40,13 @@ struct match_index_t {
 Collection::Collection(const std::string& name, const uint32_t collection_id, const uint64_t created_at,
                       const uint32_t next_seq_id, Store *store, const std::vector<field> &fields,
                       const std::string& default_sorting_field, const size_t num_memory_shards,
-                       const float max_memory_ratio, const std::string& auto_detect_schema):
+                       const float max_memory_ratio, const std::string& fallback_field_type):
        name(name), collection_id(collection_id), created_at(created_at),
        next_seq_id(next_seq_id), store(store),
        fields(fields), default_sorting_field(default_sorting_field),
        num_memory_shards(num_memory_shards),
        max_memory_ratio(max_memory_ratio),
-        indices(init_indices()), auto_detect_schema(auto_detect_schema) {
+        indices(init_indices()), fallback_field_type(fallback_field_type) {

    this->num_documents = 0;
 }
@ -233,8 +233,8 @@ nlohmann::json Collection::add_many(std::vector<std::string>& json_lines, nlohma
                get_document_from_store(get_seq_id_key(seq_id), record.old_doc);
            }

-            // if `auto_detect_schema` is enabled, we will have to update schema first before indexing
-            if(auto_detect_schema != schema_detect_types::OFF) {
+            // if `fallback_field_type` is enabled, we will have to update schema first before indexing
+            if(!fallback_field_type.empty()) {
                Option<bool> schema_change_op = check_and_update_schema(record.doc, dirty_values);
                if(!schema_change_op.ok()) {
                    record.index_failure(schema_change_op.code(), schema_change_op.error());
@ -354,7 +354,7 @@ Option<uint32_t> Collection::index_in_memory(nlohmann::json &document, uint32_t

    Option<uint32_t> validation_op = Index::validate_index_in_memory(document, seq_id, default_sorting_field,
                                                                     search_schema, facet_schema, is_update,
-                                                                     auto_detect_schema, dirty_values);
+                                                                     fallback_field_type, dirty_values);

    if(!validation_op.ok()) {
        return validation_op;
@ -382,7 +382,7 @@ size_t Collection::par_index_in_memory(std::vector<std::vector<index_record>> &
        CollectionManager::get_instance().get_thread_pool()->enqueue(
        [index, index_id, &num_indexed_vec, &iter_batch, this, &m_process, &num_processed, &cv_process]() {
            size_t num_indexed = Index::batch_memory_index(index, std::ref(iter_batch[index_id]), default_sorting_field,
-                                      search_schema, facet_schema, auto_detect_schema);
+                                      search_schema, facet_schema, fallback_field_type);
            std::unique_lock<std::mutex> lock(m_process);
            num_indexed_vec[index_id] = num_indexed;
            num_processed++;
@ -2273,13 +2273,21 @@ Option<bool> Collection::check_and_update_schema(nlohmann::json& document, const

            const std::string &fname = kv.key();
            field new_field(fname, field_type, false, true);
-            if (auto_detect_schema == schema_detect_types::STRINGIFY) {
-                if (new_field.is_array()) {
-                    new_field.type = field_types::STRING_ARRAY;
-                } else {
-                    new_field.type = field_types::STRING;
+
+            if(!fallback_field_type.empty()) {
+                if (field_types::is_string_or_array(fallback_field_type)) {
+                    // Supporting single/array field detection only for strings, as it does not seem to be too useful for
+                    // other field types.
+                    if (new_field.is_array()) {
+                        new_field.type = field_types::STRING_ARRAY;
+                    } else {
+                        new_field.type = field_types::STRING;
+                    }
+                } else if(fallback_field_type != field_types::AUTO) {
+                    new_field.type = fallback_field_type;
                }
            }
+
            search_schema.emplace(fname, new_field);
            fields.emplace_back(new_field);
            new_fields.emplace_back(new_field);
@ -2364,8 +2372,7 @@ DIRTY_VALUES Collection::parse_dirty_values_option(std::string& dirty_values) co
    if(dirty_values_op.has_value()) {
        dirty_values_action = dirty_values_op.value();
    } else {
-        dirty_values_action = (auto_detect_schema == schema_detect_types::OFF) ? DIRTY_VALUES::REJECT
-                                                                               : DIRTY_VALUES::COERCE_OR_REJECT;
+        dirty_values_action = fallback_field_type.empty() ? DIRTY_VALUES::REJECT : DIRTY_VALUES::COERCE_OR_REJECT;
    }

    return dirty_values_action;
--- a/src/collection_manager.cpp
+++ b/src/collection_manager.cpp
@ -38,9 +38,9 @@ Collection* CollectionManager::init_collection(const nlohmann::json & collection
                               collection_meta[Collection::COLLECTION_NUM_MEMORY_SHARDS].get<size_t>() :
                               DEFAULT_NUM_MEMORY_SHARDS;

-    std::string auto_detect_schema = collection_meta.count(Collection::COLLECTION_AUTO_DETECT_SCHEMA) != 0 ?
-                              collection_meta[Collection::COLLECTION_AUTO_DETECT_SCHEMA].get<std::string>() :
-                              schema_detect_types::OFF;
+    std::string fallback_field_type = collection_meta.count(Collection::COLLECTION_FALLBACK_FIELD_TYPE) != 0 ?
+                              collection_meta[Collection::COLLECTION_FALLBACK_FIELD_TYPE].get<std::string>() :
+                              "";

    LOG(INFO) << "Found collection " << this_collection_name << " with " << num_memory_shards << " memory shards.";

@ -53,7 +53,7 @@ Collection* CollectionManager::init_collection(const nlohmann::json & collection
                                            default_sorting_field,
                                            num_memory_shards,
                                            max_memory_ratio,
-                                            auto_detect_schema);
+                                            fallback_field_type);

    return collection;
 }
@ -315,13 +315,21 @@ Option<Collection*> CollectionManager::create_collection(const std::string& name
                                                         const std::vector<field> & fields,
                                                         const std::string& default_sorting_field,
                                                         const uint64_t created_at,
-                                                         const std::string& auto_detect_schema) {
+                                                         const std::string& fallback_field_type) {
    std::unique_lock lock(mutex);

    if(store->contains(Collection::get_meta_key(name))) {
        return Option<Collection*>(409, std::string("A collection with name `") + name + "` already exists.");
    }

+    // validated `fallback_field_type`
+    if(!fallback_field_type.empty()) {
+        field fallback_field_type_def("temp", fallback_field_type, false);
+        if(!fallback_field_type_def.has_valid_type()) {
+            return Option<Collection*>(400, std::string("Field `*` has an invalid type."));
+        }
+    }
+
    nlohmann::json fields_json = nlohmann::json::array();;

    Option<bool> fields_json_op = field::fields_to_json_fields(fields, default_sorting_field, fields_json);
@ -337,11 +345,11 @@ Option<Collection*> CollectionManager::create_collection(const std::string& name
    collection_meta[Collection::COLLECTION_DEFAULT_SORTING_FIELD_KEY] = default_sorting_field;
    collection_meta[Collection::COLLECTION_CREATED] = created_at;
    collection_meta[Collection::COLLECTION_NUM_MEMORY_SHARDS] = num_memory_shards;
-    collection_meta[Collection::COLLECTION_AUTO_DETECT_SCHEMA] = auto_detect_schema;
+    collection_meta[Collection::COLLECTION_FALLBACK_FIELD_TYPE] = fallback_field_type;

    Collection* new_collection = new Collection(name, next_collection_id, created_at, 0, store, fields,
                                                default_sorting_field, num_memory_shards,
-                                                this->max_memory_ratio, auto_detect_schema);
+                                                this->max_memory_ratio, fallback_field_type);
    next_collection_id++;

    rocksdb::WriteBatch batch;
@ -835,3 +843,21 @@ nlohmann::json CollectionManager::get_collection_summaries() const {

    return json_summaries;
 }
+
+Option<Collection*> CollectionManager::create_collection(nlohmann::json& req_json,
+                                                         const size_t num_memory_shards,
+                                                         const std::string& default_sorting_field) {
+    std::string fallback_field_type;
+    std::vector<field> fields;
+    auto parse_op = field::json_fields_to_fields(req_json["fields"], fallback_field_type, fields);
+
+    if(!parse_op.ok()) {
+        return Option<Collection*>(parse_op.code(), parse_op.error());
+    }
+
+    const auto created_at = static_cast<uint64_t>(std::time(nullptr));
+
+    return CollectionManager::get_instance().create_collection(req_json["name"], num_memory_shards,
+                                                                fields, default_sorting_field, created_at,
+                                                                fallback_field_type);
+}
--- a/src/core_api.cpp
+++ b/src/core_api.cpp
@ -121,7 +121,8 @@ bool post_create_collection(http_req & req, http_res & res) {
        return false;
    }

-    if(req_json[NUM_MEMORY_SHARDS].get<size_t>() == 0) {
+    size_t num_memory_shards = req_json[NUM_MEMORY_SHARDS].get<size_t>();
+    if(num_memory_shards == 0) {
        res.set_400(std::string("`") + NUM_MEMORY_SHARDS + "` should be a positive integer.");
        return false;
    }
@ -139,9 +140,9 @@ bool post_create_collection(http_req & req, http_res & res) {
        return false;
    }

-    std::string auto_detect_schema = schema_detect_types::OFF;
+    std::string fallback_field_type;
    std::vector<field> fields;
-    auto parse_op = field::json_fields_to_fields(req_json["fields"], auto_detect_schema, fields);
+    auto parse_op = field::json_fields_to_fields(req_json["fields"], fallback_field_type, fields);

    if(!parse_op.ok()) {
        res.set(parse_op.code(), parse_op.error());
@ -149,11 +150,8 @@ bool post_create_collection(http_req & req, http_res & res) {
    }

    const std::string & default_sorting_field = req_json[DEFAULT_SORTING_FIELD].get<std::string>();
-    const auto created_at = static_cast<uint64_t>(std::time(nullptr));
-
    const Option<Collection*> & collection_op =
-            collectionManager.create_collection(req_json["name"], req_json[NUM_MEMORY_SHARDS].get<size_t>(),
-            fields, default_sorting_field, created_at, auto_detect_schema);
+            collectionManager.create_collection(req_json, num_memory_shards, default_sorting_field);

    if(collection_op.ok()) {
        nlohmann::json json_response = collection_op.get()->get_summary_json();
--- a/src/index.cpp
+++ b/src/index.cpp
@ -273,7 +273,7 @@ Option<uint32_t> Index::validate_index_in_memory(nlohmann::json& document, uint3
                                                 const std::unordered_map<std::string, field> & search_schema,
                                                 const std::map<std::string, field> & facet_schema,
                                                 bool is_update,
-                                                 const std::string& auto_detect_schema,
+                                                 const std::string& fallback_field_type,
                                                 const DIRTY_VALUES& dirty_values) {

    bool missing_default_sort_field = (!default_sorting_field.empty() && document.count(default_sorting_field) == 0);
@ -300,7 +300,7 @@ Option<uint32_t> Index::validate_index_in_memory(nlohmann::json& document, uint3
        bool array_ele_erased = false;

        if(a_field.type == field_types::STRING && !document[field_name].is_string()) {
-            Option<uint32_t> coerce_op = coerce_string(dirty_values, auto_detect_schema, a_field, document, field_name, dummy_iter, false, array_ele_erased);
+            Option<uint32_t> coerce_op = coerce_string(dirty_values, fallback_field_type, a_field, document, field_name, dummy_iter, false, array_ele_erased);
            if(!coerce_op.ok()) {
                return coerce_op;
            }
@ -353,7 +353,7 @@ Option<uint32_t> Index::validate_index_in_memory(nlohmann::json& document, uint3
                array_ele_erased = false;

                if (a_field.type == field_types::STRING_ARRAY && !item.is_string()) {
-                    Option<uint32_t> coerce_op = coerce_string(dirty_values, auto_detect_schema, a_field, document, field_name, it, true, array_ele_erased);
+                    Option<uint32_t> coerce_op = coerce_string(dirty_values, fallback_field_type, a_field, document, field_name, it, true, array_ele_erased);
                    if (!coerce_op.ok()) {
                        return coerce_op;
                    }
@ -470,7 +470,7 @@ size_t Index::batch_memory_index(Index *index, std::vector<index_record> & iter_
                                 const std::string & default_sorting_field,
                                 const std::unordered_map<std::string, field> & search_schema,
                                 const std::map<std::string, field> & facet_schema,
-                                 const std::string& auto_detect_schema) {
+                                 const std::string& fallback_field_type) {

    size_t num_indexed = 0;

@ -485,7 +485,7 @@ size_t Index::batch_memory_index(Index *index, std::vector<index_record> & iter_
                                                                      default_sorting_field,
                                                                      search_schema, facet_schema,
                                                                      index_rec.is_update,
-                                                                      auto_detect_schema,
+                                                                      fallback_field_type,
                                                                      index_rec.dirty_values);

            if(!validation_op.ok()) {
@ -2399,7 +2399,7 @@ void Index::refresh_schemas(const std::vector<field>& new_fields) {
    }
 }

-Option<uint32_t> Index::coerce_string(const DIRTY_VALUES& dirty_values, const std::string& auto_detect_schema,
+Option<uint32_t> Index::coerce_string(const DIRTY_VALUES& dirty_values, const std::string& fallback_field_type,
                                      const field& a_field, nlohmann::json &document,
                                      const std::string &field_name, nlohmann::json::iterator& array_iter,
                                      bool is_array, bool& array_ele_erased) {
--- a/test/collection_all_fields_test.cpp
+++ b/test/collection_all_fields_test.cpp
@ -42,9 +42,14 @@ TEST_F(CollectionAllFieldsTest, IndexDocsWithoutSchema) {

    std::vector<sort_by> sort_fields = { sort_by("points", "DESC") };

+    // try to create collection with random fallback field type
+    auto bad_coll_op = collectionManager.create_collection("coll_bad", 1, fields, "", 0, "blah");
+    ASSERT_FALSE(bad_coll_op.ok());
+    ASSERT_EQ("Field `*` has an invalid type.", bad_coll_op.error());
+
    coll1 = collectionManager.get_collection("coll1").get();
    if(coll1 == nullptr) {
-        auto coll_op = collectionManager.create_collection("coll1", 1, fields, "", 0, schema_detect_types::AUTO);
+        auto coll_op = collectionManager.create_collection("coll1", 1, fields, "", 0, field_types::AUTO);
        coll1 = coll_op.get();
    }

@ -167,7 +172,7 @@ TEST_F(CollectionAllFieldsTest, HandleArrayTypes) {

    coll1 = collectionManager.get_collection("coll1").get();
    if(coll1 == nullptr) {
-        coll1 = collectionManager.create_collection("coll1", 1, {}, "", 0, schema_detect_types::AUTO).get();
+        coll1 = collectionManager.create_collection("coll1", 1, {}, "", 0, field_types::AUTO).get();
    }

    nlohmann::json doc;
@ -255,7 +260,7 @@ TEST_F(CollectionAllFieldsTest, ShouldBeAbleToUpdateSchemaDetectedDocs) {

    coll1 = collectionManager.get_collection("coll1").get();
    if (coll1 == nullptr) {
-        coll1 = collectionManager.create_collection("coll1", 4, fields, "", 0, schema_detect_types::AUTO).get();
+        coll1 = collectionManager.create_collection("coll1", 4, fields, "", 0, field_types::AUTO).get();
    }

    nlohmann::json doc;
@ -337,7 +342,7 @@ TEST_F(CollectionAllFieldsTest, StringifyAllValues) {

    coll1 = collectionManager.get_collection("coll1").get();
    if (coll1 == nullptr) {
-        coll1 = collectionManager.create_collection("coll1", 1, {}, "", 0, schema_detect_types::STRINGIFY).get();
+        coll1 = collectionManager.create_collection("coll1", 1, {}, "", 0, "string*").get();
    }

    nlohmann::json doc;
@ -420,12 +425,44 @@ TEST_F(CollectionAllFieldsTest, StringifyAllValues) {
    collectionManager.drop_collection("coll1");
 }

+TEST_F(CollectionAllFieldsTest, StringSingularAllValues) {
+    Collection *coll1;
+
+    coll1 = collectionManager.get_collection("coll1").get();
+    if (coll1 == nullptr) {
+        coll1 = collectionManager.create_collection("coll1", 1, {}, "", 0, "string").get();
+    }
+
+    nlohmann::json doc;
+    doc["title"] = "FIRST";
+    doc["int_values"] = {1, 2};
+
+    Option<nlohmann::json> add_op = coll1->add(doc.dump(), CREATE, "0");
+    ASSERT_FALSE(add_op.ok());
+    ASSERT_EQ("Field `int_values` must be a string.", add_op.error());
+
+    doc["int_values"] = 123;
+
+    add_op = coll1->add(doc.dump(), CREATE, "0");
+    ASSERT_TRUE(add_op.ok());
+
+    auto added_doc = add_op.get();
+
+    ASSERT_EQ("FIRST", added_doc["title"].get<std::string>());
+    ASSERT_EQ("123", added_doc["int_values"].get<std::string>());
+
+    auto results = coll1->search("first", {"title"}, "", {}, sort_fields, 0, 10, 1, FREQUENCY, false).get();
+    ASSERT_EQ(1, results["hits"].size());
+    ASSERT_EQ("FIRST", results["hits"][0]["document"]["title"].get<std::string>());
+    ASSERT_EQ("123", results["hits"][0]["document"]["int_values"].get<std::string>());
+}
+
 TEST_F(CollectionAllFieldsTest, UpdateOfDocumentsInAutoMode) {
    Collection *coll1;

    coll1 = collectionManager.get_collection("coll1").get();
    if (coll1 == nullptr) {
-        coll1 = collectionManager.create_collection("coll1", 1, {}, "", 0, schema_detect_types::AUTO).get();
+        coll1 = collectionManager.create_collection("coll1", 1, {}, "", 0, field_types::AUTO).get();
    }

    nlohmann::json doc;
@ -449,32 +486,32 @@ TEST_F(CollectionAllFieldsTest, JsonFieldsToFieldsConversion) {
    nlohmann::json fields_json = nlohmann::json::array();
    nlohmann::json all_field;
    all_field[fields::name] = "*";
-    all_field[fields::type] = "stringify";
+    all_field[fields::type] = "string*";
    fields_json.emplace_back(all_field);

-    std::string auto_detect_schema;
+    std::string fallback_field_type;
    std::vector<field> fields;

-    auto parse_op = field::json_fields_to_fields(fields_json, auto_detect_schema, fields);
+    auto parse_op = field::json_fields_to_fields(fields_json, fallback_field_type, fields);

    ASSERT_TRUE(parse_op.ok());
    ASSERT_EQ(1, fields.size());
-    ASSERT_EQ("stringify", auto_detect_schema);
+    ASSERT_EQ("string*", fallback_field_type);
    ASSERT_EQ(true, fields[0].optional);
    ASSERT_EQ(false, fields[0].facet);
    ASSERT_EQ("*", fields[0].name);
-    ASSERT_EQ("stringify", fields[0].type);
+    ASSERT_EQ("string*", fields[0].type);

    // reject when you try to set optional to false or facet to true
    fields_json[0][fields::optional] = false;
-    parse_op = field::json_fields_to_fields(fields_json, auto_detect_schema, fields);
+    parse_op = field::json_fields_to_fields(fields_json, fallback_field_type, fields);

    ASSERT_FALSE(parse_op.ok());
    ASSERT_EQ("Field `*` must be an optional field.", parse_op.error());

    fields_json[0][fields::optional] = true;
    fields_json[0][fields::facet] = true;
-    parse_op = field::json_fields_to_fields(fields_json, auto_detect_schema, fields);
+    parse_op = field::json_fields_to_fields(fields_json, fallback_field_type, fields);

    ASSERT_FALSE(parse_op.ok());
    ASSERT_EQ("Field `*` cannot be a facet field.", parse_op.error());
@ -484,7 +521,7 @@ TEST_F(CollectionAllFieldsTest, JsonFieldsToFieldsConversion) {
    // can have only one "*" field
    fields_json.emplace_back(all_field);

-    parse_op = field::json_fields_to_fields(fields_json, auto_detect_schema, fields);
+    parse_op = field::json_fields_to_fields(fields_json, fallback_field_type, fields);

    ASSERT_FALSE(parse_op.ok());
    ASSERT_EQ("There can be only one field named `*`.", parse_op.error());
@ -495,7 +532,7 @@ TEST_F(CollectionAllFieldsTest, JsonFieldsToFieldsConversion) {
    all_field[fields::type] = "auto";
    fields_json.emplace_back(all_field);

-    parse_op = field::json_fields_to_fields(fields_json, auto_detect_schema, fields);
+    parse_op = field::json_fields_to_fields(fields_json, fallback_field_type, fields);
    ASSERT_TRUE(parse_op.ok());
    ASSERT_EQ("auto", fields[0].type);
 }
--- a/test/collection_manager_test.cpp
+++ b/test/collection_manager_test.cpp
@ -82,7 +82,7 @@ TEST_F(CollectionManagerTest, CollectionCreation) {
    ASSERT_EQ(3, num_keys);
    // we already call `collection1->get_next_seq_id` above, which is side-effecting
    ASSERT_EQ(1, StringUtils::deserialize_uint32_t(next_seq_id));
-    ASSERT_EQ("{\"auto_detect_schema\":\"off\",\"created_at\":12345,\"default_sorting_field\":\"points\","
+    ASSERT_EQ("{\"created_at\":12345,\"default_sorting_field\":\"points\",\"fallback_field_type\":\"\","
              "\"fields\":[{\"facet\":false,\"name\":\"title\",\"optional\":false,\"type\":\"string\"},"
              "{\"facet\":false,\"name\":\"starring\",\"optional\":false,\"type\":\"string\"},"
              "{\"facet\":true,\"name\":\"cast\",\"optional\":true,\"type\":\"string[]\"},"
@ -288,7 +288,7 @@ TEST_F(CollectionManagerTest, RestoreAutoSchemaDocsOnRestart) {

    coll1 = collectionManager.get_collection("coll1").get();
    if(coll1 == nullptr) {
-        coll1 = collectionManager.create_collection("coll1", 1, fields, "max", 0, schema_detect_types::AUTO).get();
+        coll1 = collectionManager.create_collection("coll1", 1, fields, "max", 0, field_types::AUTO).get();
    }

    std::string json_line;