mirror of
https://github.com/typesense/typesense.git
synced 2025-05-21 14:12:27 +08:00
Reduce no-op operations during updates to fix perf.
This commit is contained in:
parent
46b0f08d3f
commit
40b5eca7d0
@ -353,10 +353,11 @@ public:
|
||||
const std::vector<std::string>& group_by_fields,
|
||||
const std::string& default_sorting_field) const;
|
||||
|
||||
Option<uint32_t> remove(const uint32_t seq_id, const nlohmann::json & document);
|
||||
Option<uint32_t> remove(const uint32_t seq_id, const nlohmann::json & document, const bool is_update);
|
||||
|
||||
Option<uint32_t> index_in_memory(const nlohmann::json & document, uint32_t seq_id,
|
||||
const std::string & default_sorting_field);
|
||||
const std::string & default_sorting_field,
|
||||
const bool is_update);
|
||||
|
||||
static size_t batch_memory_index(Index *index,
|
||||
std::vector<index_record> & iter_batch,
|
||||
|
@ -357,7 +357,7 @@ Option<uint32_t> Collection::index_in_memory(nlohmann::json &document, uint32_t
|
||||
}
|
||||
|
||||
Index* index = indices[seq_id % num_memory_shards];
|
||||
index->index_in_memory(document, seq_id, default_sorting_field);
|
||||
index->index_in_memory(document, seq_id, default_sorting_field, is_update);
|
||||
|
||||
num_documents += 1;
|
||||
return Option<>(200);
|
||||
@ -1672,7 +1672,7 @@ void Collection::remove_document(const nlohmann::json & document, const uint32_t
|
||||
std::unique_lock lock(mutex);
|
||||
|
||||
Index* index = indices[seq_id % num_memory_shards];
|
||||
index->remove(seq_id, document);
|
||||
index->remove(seq_id, document, false);
|
||||
num_documents -= 1;
|
||||
}
|
||||
|
||||
|
@ -18,9 +18,11 @@ Index::Index(const std::string name, const std::unordered_map<std::string, field
|
||||
|
||||
for(const auto & fname_field: search_schema) {
|
||||
if(fname_field.second.is_string()) {
|
||||
art_tree *t = new art_tree;
|
||||
art_tree_init(t);
|
||||
search_index.emplace(fname_field.first, t);
|
||||
if(fname_field.second.index) {
|
||||
art_tree *t = new art_tree;
|
||||
art_tree_init(t);
|
||||
search_index.emplace(fname_field.first, t);
|
||||
}
|
||||
} else {
|
||||
num_tree_t* num_tree = new num_tree_t;
|
||||
numerical_index.emplace(fname_field.first, num_tree);
|
||||
@ -105,7 +107,8 @@ int64_t Index::float_to_in64_t(float f) {
|
||||
}
|
||||
|
||||
Option<uint32_t> Index::index_in_memory(const nlohmann::json &document, uint32_t seq_id,
|
||||
const std::string & default_sorting_field) {
|
||||
const std::string & default_sorting_field,
|
||||
const bool is_update) {
|
||||
|
||||
std::unique_lock lock(mutex);
|
||||
|
||||
@ -121,7 +124,10 @@ Option<uint32_t> Index::index_in_memory(const nlohmann::json &document, uint32_t
|
||||
points = get_points_from_doc(document, default_sorting_field);
|
||||
}
|
||||
|
||||
seq_ids.append(seq_id);
|
||||
if(!is_update) {
|
||||
// for updates, the seq_id will already exist
|
||||
seq_ids.append(seq_id);
|
||||
}
|
||||
|
||||
// assumes that validation has already been done
|
||||
for(const auto& field_pair: search_schema) {
|
||||
@ -450,13 +456,13 @@ size_t Index::batch_memory_index(Index *index, std::vector<index_record> & iter_
|
||||
// scrub string fields to reduce delete ops
|
||||
get_doc_changes(index_rec.doc, index_rec.old_doc, index_rec.new_doc, index_rec.del_doc);
|
||||
index->scrub_reindex_doc(index_rec.doc, index_rec.del_doc, index_rec.old_doc);
|
||||
index->remove(index_rec.seq_id, index_rec.del_doc);
|
||||
index->remove(index_rec.seq_id, index_rec.del_doc, index_rec.is_update);
|
||||
}
|
||||
|
||||
Option<uint32_t> index_mem_op(0);
|
||||
|
||||
try {
|
||||
index_mem_op = index->index_in_memory(index_rec.doc, index_rec.seq_id, default_sorting_field);
|
||||
index_mem_op = index->index_in_memory(index_rec.doc, index_rec.seq_id, default_sorting_field, index_rec.is_update);
|
||||
} catch(const std::exception& e) {
|
||||
const std::string& error_msg = std::string("Fatal error during indexing: ") + e.what();
|
||||
LOG(ERROR) << error_msg << ", document: " << index_rec.doc;
|
||||
@ -464,7 +470,7 @@ size_t Index::batch_memory_index(Index *index, std::vector<index_record> & iter_
|
||||
}
|
||||
|
||||
if(!index_mem_op.ok()) {
|
||||
index->index_in_memory(index_rec.del_doc, index_rec.seq_id, default_sorting_field);
|
||||
index->index_in_memory(index_rec.del_doc, index_rec.seq_id, default_sorting_field, true);
|
||||
index_rec.index_failure(index_mem_op.code(), index_mem_op.error());
|
||||
continue;
|
||||
}
|
||||
@ -2388,7 +2394,7 @@ void Index::remove_and_shift_offset_index(sorted_array& offset_index, const uint
|
||||
delete[] new_array;
|
||||
}
|
||||
|
||||
Option<uint32_t> Index::remove(const uint32_t seq_id, const nlohmann::json & document) {
|
||||
Option<uint32_t> Index::remove(const uint32_t seq_id, const nlohmann::json & document, const bool is_update) {
|
||||
std::unique_lock lock(mutex);
|
||||
|
||||
for(auto it = document.begin(); it != document.end(); ++it) {
|
||||
@ -2498,7 +2504,7 @@ Option<uint32_t> Index::remove(const uint32_t seq_id, const nlohmann::json & doc
|
||||
}
|
||||
}
|
||||
|
||||
if(seq_ids.contains(seq_id)) {
|
||||
if(!is_update) {
|
||||
seq_ids.remove_value(seq_id);
|
||||
}
|
||||
|
||||
|
@ -253,6 +253,10 @@ void sorted_array::indexOf(const uint32_t *values, const size_t values_len, uint
|
||||
}
|
||||
|
||||
void sorted_array::remove_value(uint32_t value) {
|
||||
if(length == 0) {
|
||||
return ;
|
||||
}
|
||||
|
||||
// A lower bound search returns the first element in the sequence that is >= `value`
|
||||
// So, `found_val` will be either equal or greater than `value`
|
||||
uint32_t found_val;
|
||||
|
@ -923,6 +923,8 @@ TEST_F(CollectionAllFieldsTest, DoNotIndexFieldMarkedAsNonIndex) {
|
||||
auto add_op = coll1->add(doc.dump(), CREATE);
|
||||
ASSERT_TRUE(add_op.ok());
|
||||
|
||||
ASSERT_EQ(0, coll1->_get_indexes()[0]->_get_search_index().count("post"));
|
||||
|
||||
auto res_op = coll1->search("Amazon", {"description_txt"}, "", {}, sort_fields, 0, 10, 1, FREQUENCY, false);
|
||||
ASSERT_FALSE(res_op.ok());
|
||||
ASSERT_EQ("Could not find a field named `description_txt` in the schema.", res_op.error());
|
||||
@ -936,6 +938,8 @@ TEST_F(CollectionAllFieldsTest, DoNotIndexFieldMarkedAsNonIndex) {
|
||||
auto update_op = coll1->add(doc.dump(), UPDATE, "0");
|
||||
ASSERT_TRUE(add_op.ok());
|
||||
|
||||
ASSERT_EQ(0, coll1->_get_indexes()[0]->_get_search_index().count("post"));
|
||||
|
||||
auto res = coll1->search("Amazon", {"company_name"}, "", {}, sort_fields, 0, 10, 1, FREQUENCY, false).get();
|
||||
ASSERT_EQ("Some post updated.", res["hits"][0]["document"]["post"].get<std::string>());
|
||||
|
||||
@ -943,6 +947,11 @@ TEST_F(CollectionAllFieldsTest, DoNotIndexFieldMarkedAsNonIndex) {
|
||||
auto del_op = coll1->remove("0");
|
||||
ASSERT_TRUE(del_op.ok());
|
||||
|
||||
// facet search should also be disabled
|
||||
auto fs_op = coll1->search("Amazon", {"company_name"}, "", {"description_txt"}, sort_fields, 0, 10, 1, FREQUENCY, false);
|
||||
ASSERT_FALSE(fs_op.ok());
|
||||
ASSERT_EQ("Could not find a facet field named `description_txt` in the schema.", fs_op.error());
|
||||
|
||||
fields = {field("company_name", field_types::STRING, false),
|
||||
field("num_employees", field_types::INT32, false),
|
||||
field("post", field_types::STRING, false, false, false),
|
||||
|
Loading…
x
Reference in New Issue
Block a user