mirror of
https://github.com/typesense/typesense.git
synced 2025-05-19 21:22:25 +08:00
Separate geo index for every field + proper deletion.
This commit is contained in:
parent
a247a79a80
commit
56bbf8df26
@ -172,7 +172,7 @@ private:
|
||||
|
||||
spp::sparse_hash_map<std::string, num_tree_t*> numerical_index;
|
||||
|
||||
spp::sparse_hash_map<std::string, std::vector<uint32_t>> geopoint_index;
|
||||
spp::sparse_hash_map<std::string, spp::sparse_hash_map<std::string, std::vector<uint32_t>>*> geopoint_index;
|
||||
|
||||
// facet_field => (seq_id => values)
|
||||
spp::sparse_hash_map<std::string, spp::sparse_hash_map<uint32_t, facet_hash_values_t>*> facet_index_v3;
|
||||
|
@ -30,6 +30,9 @@ Index::Index(const std::string name, const std::unordered_map<std::string, field
|
||||
art_tree_init(t);
|
||||
search_index.emplace(fname_field.first, t);
|
||||
}
|
||||
} else if(fname_field.second.is_geopoint()) {
|
||||
auto field_geo_index = new spp::sparse_hash_map<std::string, std::vector<uint32_t>>();
|
||||
geopoint_index.emplace(fname_field.first, field_geo_index);
|
||||
} else {
|
||||
num_tree_t* num_tree = new num_tree_t;
|
||||
numerical_index.emplace(fname_field.first, num_tree);
|
||||
@ -65,12 +68,21 @@ Index::~Index() {
|
||||
name_tree.second = nullptr;
|
||||
}
|
||||
|
||||
search_index.clear();
|
||||
|
||||
for(auto & name_index: geopoint_index) {
|
||||
delete name_index.second;
|
||||
name_index.second = nullptr;
|
||||
}
|
||||
|
||||
geopoint_index.clear();
|
||||
|
||||
for(auto & name_tree: numerical_index) {
|
||||
delete name_tree.second;
|
||||
name_tree.second = nullptr;
|
||||
}
|
||||
|
||||
search_index.clear();
|
||||
numerical_index.clear();
|
||||
|
||||
for(auto & name_map: sort_index) {
|
||||
delete name_map.second;
|
||||
@ -218,7 +230,8 @@ Option<uint32_t> Index::index_in_memory(const nlohmann::json &document, uint32_t
|
||||
const std::vector<double>& latlong = document[field_name];
|
||||
S2Point point = S2LatLng::FromDegrees(latlong[0], latlong[1]).ToPoint();
|
||||
for(const auto& term: indexer.GetIndexTerms(point, "")) {
|
||||
geopoint_index[term].push_back(seq_id);
|
||||
auto geo_index = geopoint_index.at(field_name);
|
||||
(*geo_index)[term].push_back(seq_id);
|
||||
}
|
||||
} else if(field_pair.second.type == field_types::STRING_ARRAY) {
|
||||
art_tree *t = search_index.at(field_name);
|
||||
@ -1006,7 +1019,8 @@ uint32_t Index::do_filtering(uint32_t** filter_ids_out, const std::vector<filter
|
||||
for(size_t i = 0; i < filters.size(); i++) {
|
||||
const filter & a_filter = filters[i];
|
||||
bool has_search_index = search_index.count(a_filter.field_name) != 0 ||
|
||||
numerical_index.count(a_filter.field_name) != 0;
|
||||
numerical_index.count(a_filter.field_name) != 0 ||
|
||||
geopoint_index.count(a_filter.field_name) != 0;
|
||||
|
||||
if(!has_search_index) {
|
||||
continue;
|
||||
@ -1132,8 +1146,9 @@ uint32_t Index::do_filtering(uint32_t** filter_ids_out, const std::vector<filter
|
||||
S2RegionTermIndexer indexer(options);
|
||||
|
||||
for (const auto& term : indexer.GetQueryTerms(*query_region, "")) {
|
||||
const auto& ids_it = geopoint_index.find(term);
|
||||
if(ids_it != geopoint_index.end()) {
|
||||
auto geo_index = geopoint_index.at(a_filter.field_name);
|
||||
const auto& ids_it = geo_index->find(term);
|
||||
if(ids_it != geo_index->end()) {
|
||||
geo_result_ids.insert(ids_it->second.begin(), ids_it->second.end());
|
||||
}
|
||||
}
|
||||
@ -2474,6 +2489,21 @@ Option<uint32_t> Index::remove(const uint32_t seq_id, const nlohmann::json & doc
|
||||
int64_t bool_int64 = value ? 1 : 0;
|
||||
num_tree->remove(bool_int64, seq_id);
|
||||
}
|
||||
} else if(search_field.is_geopoint()) {
|
||||
auto geo_index = geopoint_index[field_name];
|
||||
S2RegionTermIndexer::Options options;
|
||||
options.set_index_contains_points_only(true);
|
||||
S2RegionTermIndexer indexer(options);
|
||||
|
||||
const std::vector<double>& latlong = document[field_name];
|
||||
S2Point point = S2LatLng::FromDegrees(latlong[0], latlong[1]).ToPoint();
|
||||
for(const auto& term: indexer.GetIndexTerms(point, "")) {
|
||||
std::vector<uint32_t>& ids = (*geo_index)[term];
|
||||
ids.erase(std::remove(ids.begin(), ids.end(), seq_id), ids.end());
|
||||
if(ids.empty()) {
|
||||
geo_index->erase(term);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// remove facets
|
||||
@ -2540,6 +2570,9 @@ void Index::refresh_schemas(const std::vector<field>& new_fields) {
|
||||
art_tree *t = new art_tree;
|
||||
art_tree_init(t);
|
||||
search_index.emplace(new_field.name, t);
|
||||
} else if(new_field.is_geopoint()) {
|
||||
auto field_geo_index = new spp::sparse_hash_map<std::string, std::vector<uint32_t>>();
|
||||
geopoint_index.emplace(new_field.name, field_geo_index);
|
||||
} else {
|
||||
num_tree_t* num_tree = new num_tree_t;
|
||||
numerical_index.emplace(new_field.name, num_tree);
|
||||
|
@ -1033,6 +1033,44 @@ TEST_F(CollectionFilteringTest, GeoPointFiltering) {
|
||||
collectionManager.drop_collection("coll1");
|
||||
}
|
||||
|
||||
TEST_F(CollectionFilteringTest, GeoPointRemoval) {
|
||||
std::vector<field> fields = {field("title", field_types::STRING, false),
|
||||
field("loc1", field_types::GEOPOINT, false),
|
||||
field("loc2", field_types::GEOPOINT, false),
|
||||
field("points", field_types::INT32, false),};
|
||||
|
||||
Collection* coll1 = collectionManager.create_collection("coll1", 1, fields, "points").get();
|
||||
|
||||
nlohmann::json doc;
|
||||
doc["id"] = "0";
|
||||
doc["title"] = "Palais Garnier";
|
||||
doc["loc1"] = {48.872576479306765, 2.332291112241466};
|
||||
doc["loc2"] = {48.84620987789056, 2.345152755563131};
|
||||
doc["points"] = 100;
|
||||
|
||||
ASSERT_TRUE(coll1->add(doc.dump()).ok());
|
||||
|
||||
auto results = coll1->search("*",
|
||||
{}, "loc1: (48.87491151802846, 2.343945883701618, 1 km)",
|
||||
{}, {}, {0}, 10, 1, FREQUENCY).get();
|
||||
|
||||
ASSERT_EQ(1, results["found"].get<size_t>());
|
||||
ASSERT_EQ(1, results["hits"].size());
|
||||
|
||||
// remove the document, index another document and try querying again
|
||||
coll1->remove("0");
|
||||
doc["id"] = "1";
|
||||
|
||||
ASSERT_TRUE(coll1->add(doc.dump()).ok());
|
||||
|
||||
results = coll1->search("*",
|
||||
{}, "loc1: (48.87491151802846, 2.343945883701618, 1 km)",
|
||||
{}, {}, {0}, 10, 1, FREQUENCY).get();
|
||||
|
||||
ASSERT_EQ(1, results["found"].get<size_t>());
|
||||
ASSERT_EQ(1, results["hits"].size());
|
||||
}
|
||||
|
||||
TEST_F(CollectionFilteringTest, GeoPolygonFiltering) {
|
||||
Collection *coll1;
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user