From 00061d61d5922c41ebd5c97f94bf9a7c0e701de0 Mon Sep 17 00:00:00 2001 From: Harpreet Sangar Date: Wed, 14 Jun 2023 11:34:19 +0530 Subject: [PATCH] Optimize geo filtering. --- include/numeric_range_trie_test.h | 7 +++++-- src/filter_result_iterator.cpp | 16 +++------------- src/numeric_range_trie.cpp | 31 ++++++++++++++++++++++++++----- 3 files changed, 34 insertions(+), 20 deletions(-) diff --git a/include/numeric_range_trie_test.h b/include/numeric_range_trie_test.h index ed695a70..3b6f8f68 100644 --- a/include/numeric_range_trie_test.h +++ b/include/numeric_range_trie_test.h @@ -16,6 +16,8 @@ class NumericTrie { void insert_geopoint_helper(const uint64_t& cell_id, const uint32_t& seq_id, char& level, const char& max_level); + void search_geopoints_helper(const uint64_t& cell_id, const char& max_index_level, std::set& matches); + void search_range_helper(const int64_t& low,const int64_t& high, const char& max_level, std::vector& matches); @@ -41,7 +43,8 @@ class NumericTrie { void insert_geopoint(const uint64_t& cell_id, const uint32_t& seq_id, const char& max_level); - void search_geopoint(const uint64_t& cell_id, const char& max_index_level, uint32_t*& ids, uint32_t& ids_length); + void search_geopoints(const std::vector& cell_ids, const char& max_index_level, + std::vector& geo_result_ids); void delete_geopoint(const uint64_t& cell_id, uint32_t id, const char& max_level); @@ -120,7 +123,7 @@ public: void insert_geopoint(const uint64_t& cell_id, const uint32_t& seq_id); - void search_geopoint(const uint64_t& cell_id, uint32_t*& ids, uint32_t& ids_length); + void search_geopoints(const std::vector& cell_ids, std::vector& geo_result_ids); void delete_geopoint(const uint64_t& cell_id, uint32_t id); diff --git a/src/filter_result_iterator.cpp b/src/filter_result_iterator.cpp index 5fb432f2..4794e030 100644 --- a/src/filter_result_iterator.cpp +++ b/src/filter_result_iterator.cpp @@ -894,23 +894,13 @@ void filter_result_iterator_t::init() { S2RegionTermIndexer indexer(options); auto const& geo_range_index = index->geo_range_index.at(a_filter.field_name); + std::vector cell_ids; for (const auto& term : indexer.GetQueryTerms(*query_region, "")) { auto cell = S2CellId::FromToken(term); - uint32_t* geo_ids = nullptr; - uint32_t geo_ids_length = 0; - - geo_range_index->search_geopoint(cell.id(), geo_ids, geo_ids_length); - - geo_result_ids.reserve(geo_result_ids.size() + geo_ids_length); - for (uint32_t i = 0; i < geo_ids_length; i++) { - geo_result_ids.push_back(geo_ids[i]); - } - - delete [] geo_ids; + cell_ids.push_back(cell.id()); } - gfx::timsort(geo_result_ids.begin(), geo_result_ids.end()); - geo_result_ids.erase(std::unique( geo_result_ids.begin(), geo_result_ids.end() ), geo_result_ids.end()); + geo_range_index->search_geopoints(cell_ids, geo_result_ids); // Skip exact filtering step if query radius is greater than the threshold. if (fi < a_filter.params.size() && diff --git a/src/numeric_range_trie.cpp b/src/numeric_range_trie.cpp index 86090304..71970894 100644 --- a/src/numeric_range_trie.cpp +++ b/src/numeric_range_trie.cpp @@ -1,4 +1,5 @@ #include +#include #include "numeric_range_trie_test.h" #include "array_utils.h" @@ -26,12 +27,12 @@ void NumericTrie::insert_geopoint(const uint64_t& cell_id, const uint32_t& seq_i positive_trie->insert_geopoint(cell_id, seq_id, max_level); } -void NumericTrie::search_geopoint(const uint64_t& cell_id, uint32_t*& ids, uint32_t& ids_length) { +void NumericTrie::search_geopoints(const std::vector& cell_ids, std::vector& geo_result_ids) { if (positive_trie == nullptr) { return; } - positive_trie->search_geopoint(cell_id, max_level, ids, ids_length); + positive_trie->search_geopoints(cell_ids, max_level, geo_result_ids); } void NumericTrie::delete_geopoint(const uint64_t& cell_id, uint32_t id) { @@ -480,8 +481,8 @@ char get_max_search_level(const uint64_t& cell_id, const char& max_level) { return i; } -void NumericTrie::Node::search_geopoint(const uint64_t& cell_id, const char& max_index_level, - uint32_t*& ids, uint32_t& ids_length) { +void NumericTrie::Node::search_geopoints_helper(const uint64_t& cell_id, const char& max_index_level, + std::set& matches) { char level = 1; Node* root = this; auto index = get_geopoint_index(cell_id, level, max_index_level); @@ -496,7 +497,27 @@ void NumericTrie::Node::search_geopoint(const uint64_t& cell_id, const char& max index = get_geopoint_index(cell_id, ++level, max_index_level); } - root->get_all_ids(ids, ids_length); + matches.insert(root); +} + +void NumericTrie::Node::search_geopoints(const std::vector& cell_ids, const char& max_index_level, + std::vector& geo_result_ids) { + std::set matches; + for (const auto &cell_id: cell_ids) { + search_geopoints_helper(cell_id, max_index_level, matches); + } + + for (auto const& match: matches) { + auto const& m_seq_ids = match->seq_ids.uncompress(); + for (uint32_t i = 0; i < match->seq_ids.getLength(); i++) { + geo_result_ids.push_back(m_seq_ids[i]); + } + + delete [] m_seq_ids; + } + + gfx::timsort(geo_result_ids.begin(), geo_result_ids.end()); + geo_result_ids.erase(unique(geo_result_ids.begin(), geo_result_ids.end()), geo_result_ids.end()); } void NumericTrie::Node::delete_geopoint(const uint64_t& cell_id, uint32_t id, const char& max_level) {