Remove sort schema book keeping datastructure.

This commit is contained in:
Kishore Nallan 2022-02-28 17:15:51 +05:30
parent c140d42f80
commit 69d98d2751
5 changed files with 24 additions and 38 deletions

View File

@ -174,8 +174,6 @@ private:
std::unordered_map<std::string, field> search_schema;
std::unordered_map<std::string, field> sort_schema;
std::map<std::string, override_t> overrides;
spp::sparse_hash_map<std::string, synonym_t> synonym_definitions;

View File

@ -430,8 +430,6 @@ private:
std::unordered_map<std::string, field> search_schema;
std::unordered_map<std::string, field> sort_schema;
spp::sparse_hash_map<std::string, art_tree*> search_index;
spp::sparse_hash_map<std::string, num_tree_t*> numerical_index;
@ -633,7 +631,6 @@ public:
const Store* store,
ThreadPool* thread_pool,
const std::unordered_map<std::string, field>& search_schema,
const std::unordered_map<std::string, field>& sort_schema,
const std::vector<char>& symbols_to_index, const std::vector<char>& token_separators);
~Index();

View File

@ -517,7 +517,7 @@ Option<bool> Collection::validate_and_standardize_sort_fields(const std::vector<
sort_field_std.text_match_buckets = std::stoll(match_parts[1]);
} else {
if(sort_schema.count(actual_field_name) == 0) {
if(search_schema.count(actual_field_name) == 0 || !search_schema.at(actual_field_name).sort) {
std::string error = "Could not find a field named `" + actual_field_name + "` in the schema for sorting.";
return Option<bool>(404, error);
}
@ -611,7 +611,8 @@ Option<bool> Collection::validate_and_standardize_sort_fields(const std::vector<
}
}
if(sort_field_std.name != sort_field_const::text_match && sort_schema.count(sort_field_std.name) == 0) {
if(sort_field_std.name != sort_field_const::text_match && (search_schema.count(sort_field_std.name) == 0 ||
!search_schema.at(sort_field_std.name).sort)) {
std::string error = "Could not find a field named `" + sort_field_std.name + "` in the schema for sorting.";
return Option<bool>(404, error);
}
@ -2295,8 +2296,10 @@ std::vector<field> Collection::get_sort_fields() {
std::shared_lock lock(mutex);
std::vector<field> sort_fields_copy;
for(auto it = sort_schema.begin(); it != sort_schema.end(); ++it) {
sort_fields_copy.push_back(it->second);
for(auto it = search_schema.begin(); it != search_schema.end(); ++it) {
if(it->second.sort) {
sort_fields_copy.push_back(it->second);
}
}
return sort_fields_copy;
@ -2697,11 +2700,6 @@ Option<bool> Collection::check_and_update_schema(nlohmann::json& document, const
}
for(auto& new_field: new_fields) {
if (new_field.is_num_sort_field()) {
// only numerical fields are added to sort index in dynamic type detection
sort_schema.emplace(new_field.name, new_field);
}
search_schema.emplace(new_field.name, new_field);
fields.emplace_back(new_field);
}
@ -2757,17 +2755,13 @@ Index* Collection::init_index() {
}
search_schema.emplace(field.name, field);
if(field.is_sortable()) {
sort_schema.emplace(field.name, field);
}
}
return new Index(name+std::to_string(0),
collection_id,
store,
CollectionManager::get_instance().get_thread_pool(),
search_schema, sort_schema,
search_schema,
symbols_to_index, token_separators);
}

View File

@ -40,10 +40,9 @@ spp::sparse_hash_map<uint32_t, int64_t> Index::str_sentinel_value;
Index::Index(const std::string& name, const uint32_t collection_id, const Store* store, ThreadPool* thread_pool,
const std::unordered_map<std::string, field> & search_schema,
const std::unordered_map<std::string, field>& sort_schema,
const std::vector<char>& symbols_to_index, const std::vector<char>& token_separators):
name(name), collection_id(collection_id), store(store), thread_pool(thread_pool),
search_schema(search_schema), sort_schema(sort_schema),
search_schema(search_schema),
seq_ids(new id_list_t(256)), symbols_to_index(symbols_to_index), token_separators(token_separators) {
for(const auto & fname_field: search_schema) {
@ -66,6 +65,16 @@ Index::Index(const std::string& name, const uint32_t collection_id, const Store*
numerical_index.emplace(fname_field.first, num_tree);
}
if(fname_field.second.sort) {
if(fname_field.second.type == field_types::STRING) {
adi_tree_t* tree = new adi_tree_t();
str_sort_index.emplace(fname_field.first, tree);
} else if(fname_field.second.type != field_types::GEOPOINT_ARRAY) {
spp::sparse_hash_map<uint32_t, int64_t> * doc_to_score = new spp::sparse_hash_map<uint32_t, int64_t>();
sort_index.emplace(fname_field.first, doc_to_score);
}
}
if(fname_field.second.facet) {
array_mapped_facet_t facet_array;
for(size_t i = 0; i < ARRAY_FACET_DIM; i++) {
@ -93,16 +102,6 @@ Index::Index(const std::string& name, const uint32_t collection_id, const Store*
}
}
for(const auto & pair: sort_schema) {
if(pair.second.type == field_types::STRING) {
adi_tree_t* tree = new adi_tree_t();
str_sort_index.emplace(pair.first, tree);
} else if(pair.second.type != field_types::GEOPOINT_ARRAY) {
spp::sparse_hash_map<uint32_t, int64_t> * doc_to_score = new spp::sparse_hash_map<uint32_t, int64_t>();
sort_index.emplace(pair.first, doc_to_score);
}
}
num_documents = 0;
}
@ -3186,16 +3185,16 @@ void Index::populate_sort_mapping(int* sort_order, std::vector<size_t>& geopoint
field_values[i] = &text_match_sentinel_value;
} else if (sort_fields_std[i].name == sort_field_const::seq_id) {
field_values[i] = &seq_id_sentinel_value;
} else if (sort_schema.count(sort_fields_std[i].name) != 0) {
if (sort_schema.at(sort_fields_std[i].name).type == field_types::GEOPOINT_ARRAY) {
} else if (search_schema.count(sort_fields_std[i].name) != 0 && search_schema.at(sort_fields_std[i].name).sort) {
if (search_schema.at(sort_fields_std[i].name).type == field_types::GEOPOINT_ARRAY) {
geopoint_indices.push_back(i);
field_values[i] = nullptr; // GEOPOINT_ARRAY uses a multi-valued index
} else if(sort_schema.at(sort_fields_std[i].name).type == field_types::STRING) {
} else if(search_schema.at(sort_fields_std[i].name).type == field_types::STRING) {
field_values[i] = &str_sentinel_value;
} else {
field_values[i] = sort_index.at(sort_fields_std[i].name);
if (sort_schema.at(sort_fields_std[i].name).is_geopoint()) {
if (search_schema.at(sort_fields_std[i].name).is_geopoint()) {
geopoint_indices.push_back(i);
}
}
@ -3910,8 +3909,6 @@ void Index::refresh_schemas(const std::vector<field>& new_fields) {
search_schema.emplace(new_field.name, new_field);
if(new_field.is_sortable()) {
sort_schema.emplace(new_field.name, new_field);
if(new_field.is_num_sortable()) {
spp::sparse_hash_map<uint32_t, int64_t> * doc_to_score = new spp::sparse_hash_map<uint32_t, int64_t>();
sort_index.emplace(new_field.name, doc_to_score);

View File

@ -12,7 +12,7 @@ TEST(IndexTest, ScrubReindexDoc) {
ThreadPool pool(4);
Index index("index", 1, nullptr, &pool, search_schema, {}, {}, {});
Index index("index", 1, nullptr, &pool, search_schema, {}, {});
nlohmann::json old_doc;
old_doc["id"] = "1";
old_doc["title"] = "One more thing.";