Defensive checks against JSON parse failures.

This commit is contained in:
Kishore Nallan 2017-09-21 06:22:11 +05:30
parent 901626652a
commit 9c1a8a2364
7 changed files with 77 additions and 18 deletions

View File

@ -57,13 +57,14 @@
- ~~only last token should be prefix searched~~
- ~~Prefix-search strings should not be null terminated~~
- ~~sort results by float field~~
- ~~json::parse must be wrapped in try catch~~
- Collection Manager collections map should store plain collection name
- init_collection of Collection manager should probably take seq_id as param
- https support
- Validate before string to int conversion in the http api layer
- When field of "id" but not string, what happens?
- Typo in prefix search
- node score should be int32, no longer uint16 like in document struct
- json::parse must be wrapped in try catch
- test for num_documents
- test for string filter comparison: title < "foo"
- test for token ranking on float field

View File

@ -42,12 +42,12 @@ public:
CollectionManager(CollectionManager const&) = delete;
void operator=(CollectionManager const&) = delete;
void init(Store *store, const std::string & auth_key);
Option<bool> init(Store *store, const std::string & auth_key);
// frees in-memory data structures when server is shutdown - helps us run a memory leak detecter properly
void dispose();
Collection* init_collection(const std::string & collection_meta_json);
Option<Collection*> init_collection(const std::string & collection_meta_json);
void add_to_collections(Collection* collection);

View File

@ -131,7 +131,16 @@ public:
if(replication_event->type == "ADD_COLLECTION_META") {
CollectionManager & collection_manager = CollectionManager::get_instance();
Collection* collection = collection_manager.init_collection(replication_event->value);
Option<Collection*> collection_op = collection_manager.init_collection(replication_event->value);
if(!collection_op.ok()) {
std::cerr << "Failed to initialize collection. Error: " << collection_op.error() << std::endl;
std::cerr << "Replication event value: " << replication_event->value << std::endl;
delete replication_event;
exit(1);
}
Collection* collection = collection_op.get();
collection_manager.add_to_collections(collection);
}

View File

@ -110,7 +110,8 @@ void post_create_collection(http_req & req, http_res & res) {
"[{\"name\": \"<field_name>\", \"type\": \"<field_type>\"}]");
}
if(sort_field_json["type"] != field_types::INT32 && sort_field_json["type"] != field_types::INT64) {
const std::string & sort_field_type = sort_field_json["type"].get<std::string>();
if(sort_field_type != field_types::INT32 && sort_field_type != field_types::INT64) {
return res.send_400("Sort field `" + sort_field_json["name"].get<std::string>() + "` must be an integer.");
}
@ -187,7 +188,7 @@ void get_search(http_req & req, http_res & res) {
StringUtils::split(req.params[SEARCH_BY], search_fields, ",");
std::vector<std::string> facet_fields;
StringUtils::split(req.params[FACET_BY], facet_fields, "&&");
StringUtils::split(req.params[FACET_BY], facet_fields, ",");
std::vector<sort_by> sort_fields;
if(req.params.count(SORT_BY) != 0) {

View File

@ -791,10 +791,17 @@ Option<nlohmann::json> Collection::search(std::string query, const std::vector<s
for(size_t field_order_kv_index = start_result_index; field_order_kv_index <= end_result_index; field_order_kv_index++) {
const auto & field_order_kv = field_order_kvs[field_order_kv_index];
const std::string& seq_id_key = get_seq_id_key((uint32_t) field_order_kv.second.key);
std::string value;
const std::string &seq_id_key = get_seq_id_key((uint32_t) field_order_kv.second.key);
store->get(seq_id_key, value);
nlohmann::json document = nlohmann::json::parse(value);
nlohmann::json document;
try {
document = nlohmann::json::parse(value);
} catch(...) {
return Option<nlohmann::json>(500, "Error while parsing stored document.");
}
// highlight query words in the result
const std::string & field_name = search_fields[search_fields.size() - field_order_kv.first];
@ -1258,7 +1265,13 @@ Option<nlohmann::json> Collection::get(const std::string & id) {
std::string parsed_document;
store->get(get_seq_id_key(seq_id), parsed_document);
nlohmann::json document = nlohmann::json::parse(parsed_document);
nlohmann::json document;
try {
document = nlohmann::json::parse(parsed_document);
} catch(...) {
return Option<nlohmann::json>(500, "Error while parsing stored document.");
}
return Option<nlohmann::json>(document);
}
@ -1275,7 +1288,12 @@ Option<std::string> Collection::remove(const std::string & id, const bool remove
std::string parsed_document;
store->get(get_seq_id_key(seq_id), parsed_document);
nlohmann::json document = nlohmann::json::parse(parsed_document);
nlohmann::json document;
try {
document = nlohmann::json::parse(parsed_document);
} catch(...) {
return Option<std::string>(500, "Error while parsing stored document.");
}
for(auto & name_field: search_schema) {
// Go through all the field names and find the keys+values so that they can be removed from in-memory index

View File

@ -8,8 +8,15 @@ CollectionManager::CollectionManager() {
}
Collection* CollectionManager::init_collection(const std::string & collection_meta_json) {
nlohmann::json collection_meta = nlohmann::json::parse(collection_meta_json);
Option<Collection*> CollectionManager::init_collection(const std::string & collection_meta_json) {
nlohmann::json collection_meta;
try {
collection_meta = nlohmann::json::parse(collection_meta_json);
} catch(...) {
return Option<Collection*>(500, "Error while parsing collection meta.");
}
std::string this_collection_name = collection_meta[COLLECTION_NAME_KEY].get<std::string>();
std::vector<field> search_fields;
@ -49,7 +56,7 @@ Collection* CollectionManager::init_collection(const std::string & collection_me
collection_sort_fields,
token_ranking_field);
return collection;
return Option<Collection*>(collection);
}
void CollectionManager::add_to_collections(Collection* collection) {
@ -57,7 +64,7 @@ void CollectionManager::add_to_collections(Collection* collection) {
collection_id_names.emplace(collection->get_collection_id(), collection->get_name());
}
void CollectionManager::init(Store *store, const std::string & auth_key) {
Option<bool> CollectionManager::init(Store *store, const std::string & auth_key) {
this->store = store;
this->auth_key = auth_key;
@ -73,7 +80,13 @@ void CollectionManager::init(Store *store, const std::string & auth_key) {
store->scan_fill(Collection::COLLECTION_META_PREFIX, collection_meta_jsons);
for(auto & collection_meta_json: collection_meta_jsons) {
Collection* collection = init_collection(collection_meta_json);
Option<Collection*> collection_op = init_collection(collection_meta_json);
if(!collection_op.ok()) {
return Option<bool>(collection_op.code(), collection_op.error());
}
Collection* collection = collection_op.get();
// Fetch records from the store and re-create memory index
std::vector<std::string> documents;
@ -82,7 +95,17 @@ void CollectionManager::init(Store *store, const std::string & auth_key) {
while(iter->Valid() && iter->key().starts_with(seq_id_prefix)) {
const std::string doc_json_str = iter->value().ToString();
nlohmann::json document = nlohmann::json::parse(doc_json_str);
nlohmann::json document;
try {
document = nlohmann::json::parse(doc_json_str);
} catch(...) {
delete iter;
return Option<bool>(500,
std::string("Error while parsing stored document from collection " + collection->get_name() +
" with key: ") + iter->key().ToString());
}
uint32_t seq_id = collection->doc_id_to_seq_id(document["id"]);
collection->index_in_memory(document, seq_id);
iter->Next();
@ -93,7 +116,7 @@ void CollectionManager::init(Store *store, const std::string & auth_key) {
add_to_collections(collection);
}
std::cout << "Finished restoring all collections from disk." << std::endl;
return Option<bool>(true);
}
void CollectionManager::dispose() {

View File

@ -36,7 +36,14 @@ int main(int argc, char **argv) {
Store store(options.get<std::string>("data-dir"));
CollectionManager & collectionManager = CollectionManager::get_instance();
collectionManager.init(&store, options.get<std::string>("api-auth-key"));
Option<bool> init_op = collectionManager.init(&store, options.get<std::string>("api-auth-key"));
if(init_op.ok()) {
std::cout << "Finished restoring all collections from disk." << std::endl;
} else {
std::cerr << "Failed initializing collections from store..." << std::endl;
std::cerr << init_op.error() << std::endl;
}
server = new HttpServer(
options.get<std::string>("listen-address"),