diff --git a/TODO.md b/TODO.md index 1ce38d3b..6fcb2190 100644 --- a/TODO.md +++ b/TODO.md @@ -57,13 +57,14 @@ - ~~only last token should be prefix searched~~ - ~~Prefix-search strings should not be null terminated~~ - ~~sort results by float field~~ +- ~~json::parse must be wrapped in try catch~~ - Collection Manager collections map should store plain collection name - init_collection of Collection manager should probably take seq_id as param +- https support - Validate before string to int conversion in the http api layer - When field of "id" but not string, what happens? - Typo in prefix search - node score should be int32, no longer uint16 like in document struct -- json::parse must be wrapped in try catch - test for num_documents - test for string filter comparison: title < "foo" - test for token ranking on float field diff --git a/include/collection_manager.h b/include/collection_manager.h index d191d1e6..ea1a6b8e 100644 --- a/include/collection_manager.h +++ b/include/collection_manager.h @@ -42,12 +42,12 @@ public: CollectionManager(CollectionManager const&) = delete; void operator=(CollectionManager const&) = delete; - void init(Store *store, const std::string & auth_key); + Option init(Store *store, const std::string & auth_key); // frees in-memory data structures when server is shutdown - helps us run a memory leak detecter properly void dispose(); - Collection* init_collection(const std::string & collection_meta_json); + Option init_collection(const std::string & collection_meta_json); void add_to_collections(Collection* collection); diff --git a/include/replicator.h b/include/replicator.h index 0b76564c..514368fb 100644 --- a/include/replicator.h +++ b/include/replicator.h @@ -131,7 +131,16 @@ public: if(replication_event->type == "ADD_COLLECTION_META") { CollectionManager & collection_manager = CollectionManager::get_instance(); - Collection* collection = collection_manager.init_collection(replication_event->value); + Option collection_op = collection_manager.init_collection(replication_event->value); + + if(!collection_op.ok()) { + std::cerr << "Failed to initialize collection. Error: " << collection_op.error() << std::endl; + std::cerr << "Replication event value: " << replication_event->value << std::endl; + delete replication_event; + exit(1); + } + + Collection* collection = collection_op.get(); collection_manager.add_to_collections(collection); } diff --git a/src/api.cpp b/src/api.cpp index 319bfc56..6931a662 100644 --- a/src/api.cpp +++ b/src/api.cpp @@ -110,7 +110,8 @@ void post_create_collection(http_req & req, http_res & res) { "[{\"name\": \"\", \"type\": \"\"}]"); } - if(sort_field_json["type"] != field_types::INT32 && sort_field_json["type"] != field_types::INT64) { + const std::string & sort_field_type = sort_field_json["type"].get(); + if(sort_field_type != field_types::INT32 && sort_field_type != field_types::INT64) { return res.send_400("Sort field `" + sort_field_json["name"].get() + "` must be an integer."); } @@ -187,7 +188,7 @@ void get_search(http_req & req, http_res & res) { StringUtils::split(req.params[SEARCH_BY], search_fields, ","); std::vector facet_fields; - StringUtils::split(req.params[FACET_BY], facet_fields, "&&"); + StringUtils::split(req.params[FACET_BY], facet_fields, ","); std::vector sort_fields; if(req.params.count(SORT_BY) != 0) { diff --git a/src/collection.cpp b/src/collection.cpp index cc138d09..a7646464 100644 --- a/src/collection.cpp +++ b/src/collection.cpp @@ -791,10 +791,17 @@ Option Collection::search(std::string query, const std::vectorget(seq_id_key, value); - nlohmann::json document = nlohmann::json::parse(value); + + nlohmann::json document; + try { + document = nlohmann::json::parse(value); + } catch(...) { + return Option(500, "Error while parsing stored document."); + } // highlight query words in the result const std::string & field_name = search_fields[search_fields.size() - field_order_kv.first]; @@ -1258,7 +1265,13 @@ Option Collection::get(const std::string & id) { std::string parsed_document; store->get(get_seq_id_key(seq_id), parsed_document); - nlohmann::json document = nlohmann::json::parse(parsed_document); + nlohmann::json document; + try { + document = nlohmann::json::parse(parsed_document); + } catch(...) { + return Option(500, "Error while parsing stored document."); + } + return Option(document); } @@ -1275,7 +1288,12 @@ Option Collection::remove(const std::string & id, const bool remove std::string parsed_document; store->get(get_seq_id_key(seq_id), parsed_document); - nlohmann::json document = nlohmann::json::parse(parsed_document); + nlohmann::json document; + try { + document = nlohmann::json::parse(parsed_document); + } catch(...) { + return Option(500, "Error while parsing stored document."); + } for(auto & name_field: search_schema) { // Go through all the field names and find the keys+values so that they can be removed from in-memory index diff --git a/src/collection_manager.cpp b/src/collection_manager.cpp index b0f3d7a3..ddfa19a0 100644 --- a/src/collection_manager.cpp +++ b/src/collection_manager.cpp @@ -8,8 +8,15 @@ CollectionManager::CollectionManager() { } -Collection* CollectionManager::init_collection(const std::string & collection_meta_json) { - nlohmann::json collection_meta = nlohmann::json::parse(collection_meta_json); +Option CollectionManager::init_collection(const std::string & collection_meta_json) { + nlohmann::json collection_meta; + + try { + collection_meta = nlohmann::json::parse(collection_meta_json); + } catch(...) { + return Option(500, "Error while parsing collection meta."); + } + std::string this_collection_name = collection_meta[COLLECTION_NAME_KEY].get(); std::vector search_fields; @@ -49,7 +56,7 @@ Collection* CollectionManager::init_collection(const std::string & collection_me collection_sort_fields, token_ranking_field); - return collection; + return Option(collection); } void CollectionManager::add_to_collections(Collection* collection) { @@ -57,7 +64,7 @@ void CollectionManager::add_to_collections(Collection* collection) { collection_id_names.emplace(collection->get_collection_id(), collection->get_name()); } -void CollectionManager::init(Store *store, const std::string & auth_key) { +Option CollectionManager::init(Store *store, const std::string & auth_key) { this->store = store; this->auth_key = auth_key; @@ -73,7 +80,13 @@ void CollectionManager::init(Store *store, const std::string & auth_key) { store->scan_fill(Collection::COLLECTION_META_PREFIX, collection_meta_jsons); for(auto & collection_meta_json: collection_meta_jsons) { - Collection* collection = init_collection(collection_meta_json); + Option collection_op = init_collection(collection_meta_json); + + if(!collection_op.ok()) { + return Option(collection_op.code(), collection_op.error()); + } + + Collection* collection = collection_op.get(); // Fetch records from the store and re-create memory index std::vector documents; @@ -82,7 +95,17 @@ void CollectionManager::init(Store *store, const std::string & auth_key) { while(iter->Valid() && iter->key().starts_with(seq_id_prefix)) { const std::string doc_json_str = iter->value().ToString(); - nlohmann::json document = nlohmann::json::parse(doc_json_str); + + nlohmann::json document; + try { + document = nlohmann::json::parse(doc_json_str); + } catch(...) { + delete iter; + return Option(500, + std::string("Error while parsing stored document from collection " + collection->get_name() + + " with key: ") + iter->key().ToString()); + } + uint32_t seq_id = collection->doc_id_to_seq_id(document["id"]); collection->index_in_memory(document, seq_id); iter->Next(); @@ -93,7 +116,7 @@ void CollectionManager::init(Store *store, const std::string & auth_key) { add_to_collections(collection); } - std::cout << "Finished restoring all collections from disk." << std::endl; + return Option(true); } void CollectionManager::dispose() { diff --git a/src/main/typesense_server.cpp b/src/main/typesense_server.cpp index d5e5ed11..a47ca196 100644 --- a/src/main/typesense_server.cpp +++ b/src/main/typesense_server.cpp @@ -36,7 +36,14 @@ int main(int argc, char **argv) { Store store(options.get("data-dir")); CollectionManager & collectionManager = CollectionManager::get_instance(); - collectionManager.init(&store, options.get("api-auth-key")); + Option init_op = collectionManager.init(&store, options.get("api-auth-key")); + + if(init_op.ok()) { + std::cout << "Finished restoring all collections from disk." << std::endl; + } else { + std::cerr << "Failed initializing collections from store..." << std::endl; + std::cerr << init_op.error() << std::endl; + } server = new HttpServer( options.get("listen-address"),