Do value coercion before removing value from index.

This commit is contained in:
Kishore Nallan 2024-06-11 16:09:03 +05:30
parent bc9e24fb57
commit 37d7ecabe7
4 changed files with 24 additions and 16 deletions

View File

@ -183,7 +183,7 @@ private:
bool& found_highlight,
bool& found_full_highlight) const;
void remove_document(const nlohmann::json & document, const uint32_t seq_id, bool remove_from_store);
void remove_document(nlohmann::json & document, const uint32_t seq_id, bool remove_from_store);
void process_remove_field_for_embedding_fields(const field& del_field, std::vector<field>& garbage_embed_fields);

View File

@ -706,10 +706,10 @@ public:
bool enable_typos_for_alpha_numerical_tokens = true
) const;
void remove_field(uint32_t seq_id, const nlohmann::json& document, const std::string& field_name,
void remove_field(uint32_t seq_id, nlohmann::json& document, const std::string& field_name,
const bool is_update);
Option<uint32_t> remove(const uint32_t seq_id, const nlohmann::json & document,
Option<uint32_t> remove(const uint32_t seq_id, nlohmann::json & document,
const std::vector<field>& del_fields, const bool is_update);
static void validate_and_preprocess(Index *index, std::vector<index_record>& iter_batch,

View File

@ -4460,7 +4460,7 @@ Option<nlohmann::json> Collection::get(const std::string & id) const {
return Option<nlohmann::json>(document);
}
void Collection::remove_document(const nlohmann::json & document, const uint32_t seq_id, bool remove_from_store) {
void Collection::remove_document(nlohmann::json & document, const uint32_t seq_id, bool remove_from_store) {
const std::string& id = document["id"];
{

View File

@ -555,16 +555,17 @@ void Index::validate_and_preprocess(Index *index,
size_t Index::
batch_memory_index(Index *index,
std::vector<index_record>& iter_batch,
const std::string & default_sorting_field,
const tsl::htrie_map<char, field> & actual_search_schema,
const tsl::htrie_map<char, field> & embedding_fields,
const std::string& fallback_field_type,
const std::vector<char>& token_separators,
const std::vector<char>& symbols_to_index,
const bool do_validation, const size_t remote_embedding_batch_size,
const size_t remote_embedding_timeout_ms, const size_t remote_embedding_num_tries, const bool generate_embeddings,
const bool use_addition_fields, const tsl::htrie_map<char, field>& addition_fields) {
std::vector<index_record>& iter_batch,
const std::string & default_sorting_field,
const tsl::htrie_map<char, field> & actual_search_schema,
const tsl::htrie_map<char, field> & embedding_fields,
const std::string& fallback_field_type,
const std::vector<char>& token_separators,
const std::vector<char>& symbols_to_index,
const bool do_validation, const size_t remote_embedding_batch_size,
const size_t remote_embedding_timeout_ms, const size_t remote_embedding_num_tries,
const bool generate_embeddings,
const bool use_addition_fields, const tsl::htrie_map<char, field>& addition_fields) {
const size_t concurrency = 4;
const size_t num_threads = std::min(concurrency, iter_batch.size());
const size_t window_size = (num_threads == 0) ? 0 :
@ -6733,7 +6734,7 @@ void Index::remove_facet_token(const field& search_field, spp::sparse_hash_map<s
}
}
void Index::remove_field(uint32_t seq_id, const nlohmann::json& document, const std::string& field_name,
void Index::remove_field(uint32_t seq_id, nlohmann::json& document, const std::string& field_name,
const bool is_update) {
const auto& search_field_it = search_schema.find(field_name);
if(search_field_it == search_schema.end()) {
@ -6750,6 +6751,13 @@ void Index::remove_field(uint32_t seq_id, const nlohmann::json& document, const
return ;
}
auto coerce_op = validator_t::coerce_element(search_field, document, document[field_name],
"", DIRTY_VALUES::COERCE_OR_REJECT);
if(!coerce_op.ok()) {
LOG(ERROR) << "Bad type for field " << field_name;
return ;
}
// Go through all the field names and find the keys+values so that they can be removed from in-memory index
if(search_field.type == field_types::STRING_ARRAY || search_field.type == field_types::STRING) {
std::vector<std::string> tokens;
@ -6892,7 +6900,7 @@ void Index::remove_field(uint32_t seq_id, const nlohmann::json& document, const
}
}
Option<uint32_t> Index::remove(const uint32_t seq_id, const nlohmann::json & document,
Option<uint32_t> Index::remove(const uint32_t seq_id, nlohmann::json & document,
const std::vector<field>& del_fields, const bool is_update) {
std::unique_lock lock(mutex);