Optimize geo filtering.

This commit is contained in:
Harpreet Sangar 2023-06-14 11:34:19 +05:30
parent 334ea25b21
commit 00061d61d5
3 changed files with 34 additions and 20 deletions

View File

@ -16,6 +16,8 @@ class NumericTrie {
void insert_geopoint_helper(const uint64_t& cell_id, const uint32_t& seq_id, char& level, const char& max_level);
void search_geopoints_helper(const uint64_t& cell_id, const char& max_index_level, std::set<Node*>& matches);
void search_range_helper(const int64_t& low,const int64_t& high, const char& max_level,
std::vector<Node*>& matches);
@ -41,7 +43,8 @@ class NumericTrie {
void insert_geopoint(const uint64_t& cell_id, const uint32_t& seq_id, const char& max_level);
void search_geopoint(const uint64_t& cell_id, const char& max_index_level, uint32_t*& ids, uint32_t& ids_length);
void search_geopoints(const std::vector<uint64_t>& cell_ids, const char& max_index_level,
std::vector<uint32_t>& geo_result_ids);
void delete_geopoint(const uint64_t& cell_id, uint32_t id, const char& max_level);
@ -120,7 +123,7 @@ public:
void insert_geopoint(const uint64_t& cell_id, const uint32_t& seq_id);
void search_geopoint(const uint64_t& cell_id, uint32_t*& ids, uint32_t& ids_length);
void search_geopoints(const std::vector<uint64_t>& cell_ids, std::vector<uint32_t>& geo_result_ids);
void delete_geopoint(const uint64_t& cell_id, uint32_t id);

View File

@ -894,23 +894,13 @@ void filter_result_iterator_t::init() {
S2RegionTermIndexer indexer(options);
auto const& geo_range_index = index->geo_range_index.at(a_filter.field_name);
std::vector<uint64_t> cell_ids;
for (const auto& term : indexer.GetQueryTerms(*query_region, "")) {
auto cell = S2CellId::FromToken(term);
uint32_t* geo_ids = nullptr;
uint32_t geo_ids_length = 0;
geo_range_index->search_geopoint(cell.id(), geo_ids, geo_ids_length);
geo_result_ids.reserve(geo_result_ids.size() + geo_ids_length);
for (uint32_t i = 0; i < geo_ids_length; i++) {
geo_result_ids.push_back(geo_ids[i]);
}
delete [] geo_ids;
cell_ids.push_back(cell.id());
}
gfx::timsort(geo_result_ids.begin(), geo_result_ids.end());
geo_result_ids.erase(std::unique( geo_result_ids.begin(), geo_result_ids.end() ), geo_result_ids.end());
geo_range_index->search_geopoints(cell_ids, geo_result_ids);
// Skip exact filtering step if query radius is greater than the threshold.
if (fi < a_filter.params.size() &&

View File

@ -1,4 +1,5 @@
#include <timsort.hpp>
#include <set>
#include "numeric_range_trie_test.h"
#include "array_utils.h"
@ -26,12 +27,12 @@ void NumericTrie::insert_geopoint(const uint64_t& cell_id, const uint32_t& seq_i
positive_trie->insert_geopoint(cell_id, seq_id, max_level);
}
void NumericTrie::search_geopoint(const uint64_t& cell_id, uint32_t*& ids, uint32_t& ids_length) {
void NumericTrie::search_geopoints(const std::vector<uint64_t>& cell_ids, std::vector<uint32_t>& geo_result_ids) {
if (positive_trie == nullptr) {
return;
}
positive_trie->search_geopoint(cell_id, max_level, ids, ids_length);
positive_trie->search_geopoints(cell_ids, max_level, geo_result_ids);
}
void NumericTrie::delete_geopoint(const uint64_t& cell_id, uint32_t id) {
@ -480,8 +481,8 @@ char get_max_search_level(const uint64_t& cell_id, const char& max_level) {
return i;
}
void NumericTrie::Node::search_geopoint(const uint64_t& cell_id, const char& max_index_level,
uint32_t*& ids, uint32_t& ids_length) {
void NumericTrie::Node::search_geopoints_helper(const uint64_t& cell_id, const char& max_index_level,
std::set<Node*>& matches) {
char level = 1;
Node* root = this;
auto index = get_geopoint_index(cell_id, level, max_index_level);
@ -496,7 +497,27 @@ void NumericTrie::Node::search_geopoint(const uint64_t& cell_id, const char& max
index = get_geopoint_index(cell_id, ++level, max_index_level);
}
root->get_all_ids(ids, ids_length);
matches.insert(root);
}
void NumericTrie::Node::search_geopoints(const std::vector<uint64_t>& cell_ids, const char& max_index_level,
std::vector<uint32_t>& geo_result_ids) {
std::set<Node*> matches;
for (const auto &cell_id: cell_ids) {
search_geopoints_helper(cell_id, max_index_level, matches);
}
for (auto const& match: matches) {
auto const& m_seq_ids = match->seq_ids.uncompress();
for (uint32_t i = 0; i < match->seq_ids.getLength(); i++) {
geo_result_ids.push_back(m_seq_ids[i]);
}
delete [] m_seq_ids;
}
gfx::timsort(geo_result_ids.begin(), geo_result_ids.end());
geo_result_ids.erase(unique(geo_result_ids.begin(), geo_result_ids.end()), geo_result_ids.end());
}
void NumericTrie::Node::delete_geopoint(const uint64_t& cell_id, uint32_t id, const char& max_level) {