mirror of
https://github.com/typesense/typesense.git
synced 2025-05-21 22:33:27 +08:00
Fix tokenizing XLM-RoBERTa models with 3 inputs and altering embedding fields
This commit is contained in:
parent
3b157f6c61
commit
29613ad054
@ -4300,6 +4300,27 @@ Option<bool> Collection::validate_alter_payload(nlohmann::json& schema_changes,
|
||||
|
||||
auto validation_op = field::validate_and_init_embed_fields(embed_json_field_indices, search_schema,
|
||||
schema_changes["fields"], diff_fields);
|
||||
|
||||
for(auto index : embed_json_field_indices) {
|
||||
auto& field = diff_fields[index.second];
|
||||
auto is_reindex = (delete_field_names.count(field.name) != 0);
|
||||
if(is_reindex) {
|
||||
for(auto& reindex_field: reindex_fields) {
|
||||
if(reindex_field.name == field.name) {
|
||||
reindex_field.num_dim = field.num_dim;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for(auto& add_field: addition_fields) {
|
||||
if(add_field.name == field.name) {
|
||||
add_field.num_dim = field.num_dim;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(!validation_op.ok()) {
|
||||
return validation_op;
|
||||
}
|
||||
|
@ -117,6 +117,11 @@ embedding_res_t TextEmbedder::Embed(const std::string& text, const size_t remote
|
||||
input_shapes.push_back({1, static_cast<int64_t>(encoded_input.input_ids.size())});
|
||||
input_shapes.push_back({1, static_cast<int64_t>(encoded_input.attention_mask.size())});
|
||||
if(session_->GetInputCount() == 3) {
|
||||
// edge case: xlm_roberta does not have token_type_ids, but if the model has it as input, we need to fill it with 0s
|
||||
if(encoded_input.token_type_ids.size() == 0) {
|
||||
encoded_input.token_type_ids.resize(encoded_input.input_ids.size(), 0);
|
||||
}
|
||||
|
||||
input_shapes.push_back({1, static_cast<int64_t>(encoded_input.token_type_ids.size())});
|
||||
}
|
||||
input_tensors.push_back(Ort::Value::CreateTensor<int64_t>(memory_info, encoded_input.input_ids.data(), encoded_input.input_ids.size(), input_shapes[0].data(), input_shapes[0].size()));
|
||||
|
Loading…
x
Reference in New Issue
Block a user