Merge pull request #1402 from ozanarmagan/v0.26-facets

Fix handling invalid images
This commit is contained in:
Kishore Nallan 2023-11-29 08:46:25 +05:30 committed by GitHub
commit 0b09e5adc3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 47 additions and 5 deletions

View File

@ -10,7 +10,9 @@ embedding_res_t CLIPImageEmbedder::embed(const std::string& encoded_image) {
auto processed_image_op = image_processor_.process_image(encoded_image);
if (!processed_image_op.ok()) {
return embedding_res_t(processed_image_op.code(), processed_image_op.error());
nlohmann::json error_json;
error_json["error"] = processed_image_op.error();
return embedding_res_t(processed_image_op.code(), error_json);
}
auto processed_image = processed_image_op.get();
@ -58,7 +60,9 @@ std::vector<embedding_res_t> CLIPImageEmbedder::batch_embed(const std::vector<st
auto processed_image_op = image_processor_.process_image(input);
if (!processed_image_op.ok()) {
results[i] = embedding_res_t(processed_image_op.code(), processed_image_op.error());
nlohmann::json error_json;
error_json["error"] = processed_image_op.error();
results[i] = embedding_res_t(processed_image_op.code(), error_json);
i++;
continue;
}
@ -67,6 +71,17 @@ std::vector<embedding_res_t> CLIPImageEmbedder::batch_embed(const std::vector<st
i++;
}
// no valid images
if (processed_images.empty()) {
std::vector<embedding_res_t> result_vector(inputs.size());
for (int i = 0; i < inputs.size(); i++) {
result_vector[i] = results[i];
}
return result_vector;
}
// create input tensor
std::vector<int64_t> input_shape = {static_cast<int64_t>(processed_images.size()), 3, 224, 224};
std::vector<const char*> input_names = {"input_ids", "pixel_values", "attention_mask"};

View File

@ -36,8 +36,7 @@ Option<processed_image_t> CLIPImageProcessor::process_image(const std::string& i
LOG(INFO) << "Running image processor";
try {
output_tensors = session_->Run(Ort::RunOptions{nullptr}, input_names.data(), &input_tensor, 1, output_names.data(), output_names.size());
} catch (const std::exception& e) {
LOG(INFO) << "Error while running image processor: " << e.what();
} catch (...) {
return Option<processed_image_t>(400, "Error while processing image");
}

View File

@ -2987,7 +2987,6 @@ TEST_F(CollectionVectorTest, TestImageEmbedding) {
auto coll = collection_create_op.get();
LOG(INFO) << "Adding image to collection";
auto add_op = coll->add(R"({
"name": "dog",
@ -3045,4 +3044,33 @@ TEST_F(CollectionVectorTest, TryAddingMultipleImageFieldToEmbedFrom) {
ASSERT_FALSE(collection_create_op.ok());
ASSERT_EQ(collection_create_op.error(), "Only one field can be used in the `embed.from` property of an embed field when embedding from an image field.");
}
TEST_F(CollectionVectorTest, TestInvalidImage) {
auto schema_json =
R"({
"name": "Images",
"fields": [
{"name": "name", "type": "string"},
{"name": "image", "type": "image", "store": false},
{"name": "embedding", "type":"float[]", "embed":{"from": ["image"], "model_config": {"model_name": "ts/clip-vit-b-p32"}}}
]
})"_json;
EmbedderManager::set_model_dir("/tmp/typesense_test/models");
auto collection_create_op = collectionManager.create_collection(schema_json);
ASSERT_TRUE(collection_create_op.ok());
auto coll = collection_create_op.get();
auto add_op = coll->add(R"({
"name": "teddy bear",
"image": "invalid"
})"_json.dump());
ASSERT_FALSE(add_op.ok());
ASSERT_EQ(add_op.error(), "Error while processing image");
}