mirror of
https://github.com/typesense/typesense.git
synced 2025-05-22 06:40:30 +08:00
Add geo_range_index
.
This commit is contained in:
parent
257e1189fe
commit
b45e7c07d4
@ -305,7 +305,9 @@ private:
|
||||
|
||||
spp::sparse_hash_map<std::string, NumericTrie*> range_index;
|
||||
|
||||
spp::sparse_hash_map<std::string, spp::sparse_hash_map<std::string, std::vector<uint32_t>>*> geopoint_index;
|
||||
spp::sparse_hash_map<std::string, NumericTrie*> geo_range_index;
|
||||
|
||||
// spp::sparse_hash_map<std::string, spp::sparse_hash_map<std::string, std::vector<uint32_t>>*> geopoint_index;
|
||||
|
||||
// geo_array_field => (seq_id => values) used for exact filtering of geo array records
|
||||
spp::sparse_hash_map<std::string, spp::sparse_hash_map<uint32_t, int64_t*>*> geo_array_index;
|
||||
|
@ -14,6 +14,8 @@ class NumericTrie {
|
||||
|
||||
void insert_helper(const int64_t& value, const uint32_t& seq_id, char& level, const char& max_level);
|
||||
|
||||
void insert_geopoint_helper(const uint64_t& cell_id, const uint32_t& seq_id, char& level, const char& max_level);
|
||||
|
||||
void search_range_helper(const int64_t& low,const int64_t& high, const char& max_level,
|
||||
std::vector<Node*>& matches);
|
||||
|
||||
@ -35,7 +37,13 @@ class NumericTrie {
|
||||
delete [] children;
|
||||
}
|
||||
|
||||
void insert(const int64_t& value, const uint32_t& seq_id, const char& max_level);
|
||||
void insert(const int64_t& cell_id, const uint32_t& seq_id, const char& max_level);
|
||||
|
||||
void insert_geopoint(const uint64_t& cell_id, const uint32_t& seq_id, const char& max_level);
|
||||
|
||||
void search_geopoint(const uint64_t& cell_id, const char& max_index_level, uint32_t*& ids, uint32_t& ids_length);
|
||||
|
||||
void delete_geopoint(const uint64_t& cell_id, uint32_t id, const char& max_level);
|
||||
|
||||
void get_all_ids(uint32_t*& ids, uint32_t& ids_length);
|
||||
|
||||
@ -110,6 +118,12 @@ public:
|
||||
|
||||
void insert(const int64_t& value, const uint32_t& seq_id);
|
||||
|
||||
void insert_geopoint(const uint64_t& cell_id, const uint32_t& seq_id);
|
||||
|
||||
void search_geopoint(const uint64_t& cell_id, uint32_t*& ids, uint32_t& ids_length);
|
||||
|
||||
void delete_geopoint(const uint64_t& cell_id, uint32_t id);
|
||||
|
||||
void search_range(const int64_t& low, const bool& low_inclusive,
|
||||
const int64_t& high, const bool& high_inclusive,
|
||||
uint32_t*& ids, uint32_t& ids_length);
|
||||
|
@ -892,13 +892,21 @@ void filter_result_iterator_t::init() {
|
||||
S2RegionTermIndexer::Options options;
|
||||
options.set_index_contains_points_only(true);
|
||||
S2RegionTermIndexer indexer(options);
|
||||
auto const& geo_range_index = index->geo_range_index.at(a_filter.field_name);
|
||||
|
||||
for (const auto& term : indexer.GetQueryTerms(*query_region, "")) {
|
||||
auto geo_index = index->geopoint_index.at(a_filter.field_name);
|
||||
const auto& ids_it = geo_index->find(term);
|
||||
if(ids_it != geo_index->end()) {
|
||||
geo_result_ids.insert(geo_result_ids.end(), ids_it->second.begin(), ids_it->second.end());
|
||||
auto cell = S2CellId::FromToken(term);
|
||||
uint32_t* geo_ids = nullptr;
|
||||
uint32_t geo_ids_length = 0;
|
||||
|
||||
geo_range_index->search_geopoint(cell.id(), geo_ids, geo_ids_length);
|
||||
|
||||
geo_result_ids.reserve(geo_result_ids.size() + geo_ids_length);
|
||||
for (uint32_t i = 0; i < geo_ids_length; i++) {
|
||||
geo_result_ids.push_back(geo_ids[i]);
|
||||
}
|
||||
|
||||
delete [] geo_ids;
|
||||
}
|
||||
|
||||
gfx::timsort(geo_result_ids.begin(), geo_result_ids.end());
|
||||
|
@ -78,8 +78,7 @@ Index::Index(const std::string& name, const uint32_t collection_id, const Store*
|
||||
art_tree_init(t);
|
||||
search_index.emplace(a_field.name, t);
|
||||
} else if(a_field.is_geopoint()) {
|
||||
auto field_geo_index = new spp::sparse_hash_map<std::string, std::vector<uint32_t>>();
|
||||
geopoint_index.emplace(a_field.name, field_geo_index);
|
||||
geo_range_index.emplace(a_field.name, new NumericTrie(64));
|
||||
|
||||
if(!a_field.is_single_geopoint()) {
|
||||
spp::sparse_hash_map<uint32_t, int64_t*> * doc_to_geos = new spp::sparse_hash_map<uint32_t, int64_t*>();
|
||||
@ -141,12 +140,12 @@ Index::~Index() {
|
||||
|
||||
search_index.clear();
|
||||
|
||||
for(auto & name_index: geopoint_index) {
|
||||
for(auto & name_index: geo_range_index) {
|
||||
delete name_index.second;
|
||||
name_index.second = nullptr;
|
||||
}
|
||||
|
||||
geopoint_index.clear();
|
||||
geo_range_index.clear();
|
||||
|
||||
for(auto& name_index: geo_array_index) {
|
||||
for(auto& kv: *name_index.second) {
|
||||
@ -811,10 +810,10 @@ void Index::index_field_in_memory(const field& afield, std::vector<index_record>
|
||||
num_tree->insert(value, seq_id);
|
||||
});
|
||||
} else if(afield.type == field_types::GEOPOINT || afield.type == field_types::GEOPOINT_ARRAY) {
|
||||
auto geo_index = geopoint_index.at(afield.name);
|
||||
auto geopoint_range_index = geo_range_index.at(afield.name);
|
||||
|
||||
iterate_and_index_numerical_field(iter_batch, afield,
|
||||
[&afield, &geo_array_index=geo_array_index, geo_index](const index_record& record, uint32_t seq_id) {
|
||||
[&afield, &geo_array_index=geo_array_index, geopoint_range_index](const index_record& record, uint32_t seq_id) {
|
||||
// nested geopoint value inside an array of object will be a simple array so must be treated as geopoint
|
||||
bool nested_obj_arr_geopoint = (afield.nested && afield.type == field_types::GEOPOINT_ARRAY &&
|
||||
!record.doc[afield.name].empty() && record.doc[afield.name][0].is_number());
|
||||
@ -828,9 +827,8 @@ void Index::index_field_in_memory(const field& afield, std::vector<index_record>
|
||||
S2RegionTermIndexer indexer(options);
|
||||
S2Point point = S2LatLng::FromDegrees(latlongs[li], latlongs[li+1]).ToPoint();
|
||||
|
||||
for(const auto& term: indexer.GetIndexTerms(point, "")) {
|
||||
(*geo_index)[term].push_back(seq_id);
|
||||
}
|
||||
auto cell = S2CellId(point);
|
||||
geopoint_range_index->insert_geopoint(cell.id(), seq_id);
|
||||
}
|
||||
|
||||
if(nested_obj_arr_geopoint) {
|
||||
@ -858,9 +856,9 @@ void Index::index_field_in_memory(const field& afield, std::vector<index_record>
|
||||
for(size_t li = 0; li < latlongs.size(); li++) {
|
||||
auto& latlong = latlongs[li];
|
||||
S2Point point = S2LatLng::FromDegrees(latlong[0], latlong[1]).ToPoint();
|
||||
for(const auto& term: indexer.GetIndexTerms(point, "")) {
|
||||
(*geo_index)[term].push_back(seq_id);
|
||||
}
|
||||
|
||||
auto cell = S2CellId(point);
|
||||
geopoint_range_index->insert_geopoint(cell.id(), seq_id);
|
||||
|
||||
int64_t packed_latlong = GeoPoint::pack_lat_lng(latlong[0], latlong[1]);
|
||||
packed_latlongs[li + 1] = packed_latlong;
|
||||
@ -1590,7 +1588,7 @@ void Index::numeric_not_equals_filter(num_tree_t* const num_tree,
|
||||
bool Index::field_is_indexed(const std::string& field_name) const {
|
||||
return search_index.count(field_name) != 0 ||
|
||||
numerical_index.count(field_name) != 0 ||
|
||||
geopoint_index.count(field_name) != 0;
|
||||
geo_range_index.count(field_name) != 0;
|
||||
}
|
||||
|
||||
void Index::aproximate_numerical_match(num_tree_t* const num_tree,
|
||||
@ -5468,7 +5466,7 @@ void Index::remove_field(uint32_t seq_id, const nlohmann::json& document, const
|
||||
}
|
||||
}
|
||||
} else if(search_field.is_geopoint()) {
|
||||
auto geo_index = geopoint_index[field_name];
|
||||
auto geopoint_range_index = geo_range_index[field_name];
|
||||
S2RegionTermIndexer::Options options;
|
||||
options.set_index_contains_points_only(true);
|
||||
S2RegionTermIndexer indexer(options);
|
||||
@ -5479,17 +5477,8 @@ void Index::remove_field(uint32_t seq_id, const nlohmann::json& document, const
|
||||
|
||||
for(const std::vector<double>& latlong: latlongs) {
|
||||
S2Point point = S2LatLng::FromDegrees(latlong[0], latlong[1]).ToPoint();
|
||||
for(const auto& term: indexer.GetIndexTerms(point, "")) {
|
||||
auto term_it = geo_index->find(term);
|
||||
if(term_it == geo_index->end()) {
|
||||
continue;
|
||||
}
|
||||
std::vector<uint32_t>& ids = term_it->second;
|
||||
ids.erase(std::remove(ids.begin(), ids.end(), seq_id), ids.end());
|
||||
if(ids.empty()) {
|
||||
geo_index->erase(term);
|
||||
}
|
||||
}
|
||||
auto cell = S2CellId(point);
|
||||
geopoint_range_index->delete_geopoint(cell.id(), seq_id);
|
||||
}
|
||||
|
||||
if(!search_field.is_single_geopoint()) {
|
||||
@ -5641,8 +5630,7 @@ void Index::refresh_schemas(const std::vector<field>& new_fields, const std::vec
|
||||
art_tree_init(t);
|
||||
search_index.emplace(new_field.name, t);
|
||||
} else if(new_field.is_geopoint()) {
|
||||
auto field_geo_index = new spp::sparse_hash_map<std::string, std::vector<uint32_t>>();
|
||||
geopoint_index.emplace(new_field.name, field_geo_index);
|
||||
geo_range_index.emplace(new_field.name, new NumericTrie(64));
|
||||
if(!new_field.is_single_geopoint()) {
|
||||
auto geo_array_map = new spp::sparse_hash_map<uint32_t, int64_t*>();
|
||||
geo_array_index.emplace(new_field.name, geo_array_map);
|
||||
@ -5692,8 +5680,8 @@ void Index::refresh_schemas(const std::vector<field>& new_fields, const std::vec
|
||||
delete search_index[del_field.name];
|
||||
search_index.erase(del_field.name);
|
||||
} else if(del_field.is_geopoint()) {
|
||||
delete geopoint_index[del_field.name];
|
||||
geopoint_index.erase(del_field.name);
|
||||
delete geo_range_index[del_field.name];
|
||||
geo_range_index.erase(del_field.name);
|
||||
|
||||
if(!del_field.is_single_geopoint()) {
|
||||
spp::sparse_hash_map<uint32_t, int64_t*>* geo_array_map = geo_array_index[del_field.name];
|
||||
|
@ -18,6 +18,30 @@ void NumericTrie::insert(const int64_t& value, const uint32_t& seq_id) {
|
||||
}
|
||||
}
|
||||
|
||||
void NumericTrie::insert_geopoint(const uint64_t& cell_id, const uint32_t& seq_id) {
|
||||
if (positive_trie == nullptr) {
|
||||
positive_trie = new NumericTrie::Node();
|
||||
}
|
||||
|
||||
positive_trie->insert_geopoint(cell_id, seq_id, max_level);
|
||||
}
|
||||
|
||||
void NumericTrie::search_geopoint(const uint64_t& cell_id, uint32_t*& ids, uint32_t& ids_length) {
|
||||
if (positive_trie == nullptr) {
|
||||
return;
|
||||
}
|
||||
|
||||
positive_trie->search_geopoint(cell_id, max_level, ids, ids_length);
|
||||
}
|
||||
|
||||
void NumericTrie::delete_geopoint(const uint64_t& cell_id, uint32_t id) {
|
||||
if (positive_trie == nullptr) {
|
||||
return;
|
||||
}
|
||||
|
||||
positive_trie->delete_geopoint(cell_id, id, max_level);
|
||||
}
|
||||
|
||||
void NumericTrie::search_range(const int64_t& low, const bool& low_inclusive,
|
||||
const int64_t& high, const bool& high_inclusive,
|
||||
uint32_t*& ids, uint32_t& ids_length) {
|
||||
@ -369,9 +393,14 @@ NumericTrie::iterator_t NumericTrie::search_equal_to(const int64_t& value) {
|
||||
return NumericTrie::iterator_t(matches);
|
||||
}
|
||||
|
||||
void NumericTrie::Node::insert(const int64_t& value, const uint32_t& seq_id, const char& max_level) {
|
||||
void NumericTrie::Node::insert(const int64_t& cell_id, const uint32_t& seq_id, const char& max_level) {
|
||||
char level = 0;
|
||||
return insert_helper(value, seq_id, level, max_level);
|
||||
return insert_helper(cell_id, seq_id, level, max_level);
|
||||
}
|
||||
|
||||
void NumericTrie::Node::insert_geopoint(const uint64_t& cell_id, const uint32_t& seq_id, const char& max_level) {
|
||||
char level = 0;
|
||||
return insert_geopoint_helper(cell_id, seq_id, level, max_level);
|
||||
}
|
||||
|
||||
inline int get_index(const int64_t& value, const char& level, const char& max_level) {
|
||||
@ -385,6 +414,10 @@ inline int get_index(const int64_t& value, const char& level, const char& max_le
|
||||
return (value >> (8 * (max_level - level))) & 0xFF;
|
||||
}
|
||||
|
||||
inline int get_geopoint_index(const uint64_t& cell_id, const char& level, const char& max_level) {
|
||||
return (cell_id >> (8 * (max_level - level))) & 0xFF;
|
||||
}
|
||||
|
||||
void NumericTrie::Node::insert_helper(const int64_t& value, const uint32_t& seq_id, char& level, const char& max_level) {
|
||||
if (level > max_level) {
|
||||
return;
|
||||
@ -409,6 +442,85 @@ void NumericTrie::Node::insert_helper(const int64_t& value, const uint32_t& seq_
|
||||
}
|
||||
}
|
||||
|
||||
void NumericTrie::Node::insert_geopoint_helper(const uint64_t& cell_id, const uint32_t& seq_id, char& level,
|
||||
const char& max_level) {
|
||||
if (level > max_level) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Root node contains all the sequence ids present in the tree.
|
||||
if (!seq_ids.contains(seq_id)) {
|
||||
seq_ids.append(seq_id);
|
||||
}
|
||||
|
||||
if (++level <= max_level) {
|
||||
if (children == nullptr) {
|
||||
children = new NumericTrie::Node* [EXPANSE]{nullptr};
|
||||
}
|
||||
|
||||
auto index = get_geopoint_index(cell_id, level, max_level);
|
||||
if (children[index] == nullptr) {
|
||||
children[index] = new NumericTrie::Node();
|
||||
}
|
||||
|
||||
return children[index]->insert_geopoint_helper(cell_id, seq_id, level, max_level);
|
||||
}
|
||||
}
|
||||
|
||||
char get_max_search_level(const uint64_t& cell_id, const char& max_level) {
|
||||
// For cell id 0x47E66C3000000000, we only have to prefix match the top four bytes since rest of the bytes are 0.
|
||||
// So the max search level would be 4 in this case.
|
||||
|
||||
uint64_t mask = 0xff;
|
||||
char i = max_level;
|
||||
while (((cell_id & mask) == 0) && --i > 0) {
|
||||
mask <<= 8;
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
void NumericTrie::Node::search_geopoint(const uint64_t& cell_id, const char& max_index_level,
|
||||
uint32_t*& ids, uint32_t& ids_length) {
|
||||
char level = 1;
|
||||
Node* root = this;
|
||||
auto index = get_geopoint_index(cell_id, level, max_index_level);
|
||||
auto max_search_level = get_max_search_level(cell_id, max_index_level);
|
||||
|
||||
while (level < max_search_level) {
|
||||
if (root->children == nullptr || root->children[index] == nullptr) {
|
||||
return;
|
||||
}
|
||||
|
||||
root = root->children[index];
|
||||
index = get_geopoint_index(cell_id, ++level, max_index_level);
|
||||
}
|
||||
|
||||
root->get_all_ids(ids, ids_length);
|
||||
}
|
||||
|
||||
void NumericTrie::Node::delete_geopoint(const uint64_t& cell_id, uint32_t id, const char& max_level) {
|
||||
char level = 1;
|
||||
Node* root = this;
|
||||
auto index = get_geopoint_index(cell_id, level, max_level);
|
||||
|
||||
while (level < max_level) {
|
||||
root->seq_ids.remove_value(id);
|
||||
|
||||
if (root->children == nullptr || root->children[index] == nullptr) {
|
||||
return;
|
||||
}
|
||||
|
||||
root = root->children[index];
|
||||
index = get_geopoint_index(cell_id, ++level, max_level);
|
||||
}
|
||||
|
||||
if (root->children != nullptr || root->children[index] != nullptr) {
|
||||
delete root->children[index];
|
||||
root->children[index] = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
void NumericTrie::Node::get_all_ids(uint32_t*& ids, uint32_t& ids_length) {
|
||||
ids = seq_ids.uncompress();
|
||||
ids_length = seq_ids.getLength();
|
||||
|
Loading…
x
Reference in New Issue
Block a user