mirror of
https://github.com/typesense/typesense.git
synced 2025-05-19 21:22:25 +08:00
Minor refactoring of collection manager.
This commit is contained in:
parent
216ac7997a
commit
8475cba007
2
TODO.md
2
TODO.md
@ -20,7 +20,7 @@
|
||||
- ~~Multi field search tests~~
|
||||
- ~~storage key prefix should include collection name~~
|
||||
- Index and search on multi-valued field
|
||||
- Restore records as well on restart (like for meta)
|
||||
- ~~Restore records as well on restart (like for meta)~~
|
||||
- drop collection should remove all records from the store
|
||||
- Pagination parameter
|
||||
- UTF-8 support for fuzzy search
|
||||
|
@ -11,6 +11,12 @@
|
||||
|
||||
class Collection {
|
||||
private:
|
||||
// Using a $ prefix so that these meta keys stay above record entries in a lexicographically ordered KV store
|
||||
static constexpr const char* COLLECTION_META_PREFIX = "$CM";
|
||||
static constexpr const char* DOC_ID_PREFIX = "$DI";
|
||||
static constexpr const char* COLLECTION_NEXT_SEQ_PREFIX = "$CS";
|
||||
static constexpr const char* SEQ_ID_PREFIX = "$SI";
|
||||
|
||||
std::string name;
|
||||
|
||||
uint32_t collection_id;
|
||||
@ -30,8 +36,9 @@ private:
|
||||
|
||||
spp::sparse_hash_map<uint32_t, int64_t> secondary_rank_scores;
|
||||
|
||||
std::string get_collection_next_seq_id_key(std::string collection_name);
|
||||
uint32_t get_next_seq_id();
|
||||
std::string get_doc_id_key(std::string doc_id);
|
||||
|
||||
std::string get_seq_id_key(uint32_t seq_id);
|
||||
|
||||
static inline std::vector<art_leaf *> next_suggestion(const std::vector<std::vector<art_leaf *>> &token_leaves,
|
||||
long long int n);
|
||||
@ -57,11 +64,21 @@ public:
|
||||
|
||||
~Collection();
|
||||
|
||||
static std::string get_next_seq_id_key(std::string collection_name);
|
||||
|
||||
static std::string get_meta_key(std::string collection_name);
|
||||
|
||||
std::string get_seq_id_prefix();
|
||||
|
||||
uint32_t get_collection_id();
|
||||
|
||||
std::string get_seq_id_key(uint32_t seq_id);
|
||||
uint32_t get_next_seq_id();
|
||||
|
||||
std::string get_doc_id_key(std::string doc_id);
|
||||
uint32_t doc_id_to_seq_id(std::string doc_id);
|
||||
|
||||
std::vector<std::string> get_rank_fields();
|
||||
|
||||
spp::sparse_hash_map<std::string, field> get_schema();
|
||||
|
||||
std::string add(std::string json_str);
|
||||
|
||||
@ -69,15 +86,10 @@ public:
|
||||
const size_t num_results, const token_ordering token_order = FREQUENCY,
|
||||
const bool prefix = false);
|
||||
void remove(std::string id);
|
||||
|
||||
void score_results(Topster<100> &topster, const int & token_rank, const std::vector<art_leaf *> &query_suggestion,
|
||||
const uint32_t *result_ids, const size_t result_size) const;
|
||||
|
||||
// Using a $ prefix so that these keys stay at the top of a lexicographically ordered KV store
|
||||
const std::string SEQ_ID_PREFIX = "$SI";
|
||||
const std::string DOC_ID_PREFIX = "$DI";
|
||||
|
||||
const std::string COLLECTION_NEXT_SEQ_PREFIX = "$CS";
|
||||
|
||||
enum {MAX_SEARCH_TOKENS = 20};
|
||||
enum {MAX_RESULTS = 100};
|
||||
|
||||
|
@ -18,19 +18,17 @@ private:
|
||||
// Using a ID instead of a collection's name makes renaming possible
|
||||
uint32_t next_collection_id;
|
||||
|
||||
const std::string NEXT_COLLECTION_ID_KEY = "$CI";
|
||||
const std::string COLLECTION_NAME_PREFIX = "$CN";
|
||||
const std::string COLLECTION_NEXT_SEQ_PREFIX = "$CS";
|
||||
static constexpr const char* COLLECTION_META_PREFIX = "$CM";
|
||||
static constexpr const char* NEXT_COLLECTION_ID_KEY = "$CI";
|
||||
|
||||
const std::string COLLECTION_NAME_KEY = "name";
|
||||
const std::string COLLECTION_ID_KEY = "id";
|
||||
const std::string COLLECTION_SEARCH_FIELDS_KEY = "search_fields";
|
||||
const std::string COLLECTION_RANK_FIELDS_KEY = "rank_fields";
|
||||
static constexpr const char* COLLECTION_NAME_KEY = "name";
|
||||
static constexpr const char* COLLECTION_ID_KEY = "id";
|
||||
static constexpr const char* COLLECTION_SEARCH_FIELDS_KEY = "search_fields";
|
||||
static constexpr const char* COLLECTION_RANK_FIELDS_KEY = "rank_fields";
|
||||
|
||||
CollectionManager();
|
||||
|
||||
std::string get_collection_name_key(std::string collection_name);
|
||||
std::string get_collection_next_seq_id_key(std::string collection_name);
|
||||
static std::string get_collection_meta_key(std::string collection_name);
|
||||
|
||||
public:
|
||||
static CollectionManager& get_instance() {
|
||||
|
@ -27,7 +27,7 @@ Collection::~Collection() {
|
||||
}
|
||||
|
||||
uint32_t Collection::get_next_seq_id() {
|
||||
store->increment(get_collection_next_seq_id_key(name), 1);
|
||||
store->increment(get_next_seq_id_key(name), 1);
|
||||
return next_seq_id++;
|
||||
}
|
||||
|
||||
@ -550,7 +550,7 @@ void Collection::remove(std::string id) {
|
||||
store->remove(get_seq_id_key(seq_id));
|
||||
}
|
||||
|
||||
std::string Collection::get_collection_next_seq_id_key(std::string collection_name) {
|
||||
std::string Collection::get_next_seq_id_key(std::string collection_name) {
|
||||
return COLLECTION_NEXT_SEQ_PREFIX + collection_name + "_SEQ";
|
||||
}
|
||||
|
||||
@ -572,3 +572,26 @@ std::string Collection::get_doc_id_key(std::string doc_id) {
|
||||
uint32_t Collection::get_collection_id() {
|
||||
return collection_id;
|
||||
}
|
||||
|
||||
uint32_t Collection::doc_id_to_seq_id(std::string doc_id) {
|
||||
std::string seq_id_str;
|
||||
store->get(get_doc_id_key(doc_id), seq_id_str);
|
||||
uint32_t seq_id = (uint32_t) std::stoi(seq_id_str);
|
||||
return seq_id;
|
||||
}
|
||||
|
||||
std::vector<std::string> Collection::get_rank_fields() {
|
||||
return rank_fields;
|
||||
}
|
||||
|
||||
spp::sparse_hash_map<std::string, field> Collection::get_schema() {
|
||||
return schema;
|
||||
};
|
||||
|
||||
std::string Collection::get_meta_key(std::string collection_name) {
|
||||
return COLLECTION_META_PREFIX + collection_name;
|
||||
}
|
||||
|
||||
std::string Collection::get_seq_id_prefix() {
|
||||
return std::to_string(collection_id) + "_" + SEQ_ID_PREFIX;
|
||||
}
|
@ -21,7 +21,7 @@ void CollectionManager::init(Store *store) {
|
||||
}
|
||||
|
||||
std::vector<std::string> collection_meta_jsons;
|
||||
store->scan_fill(COLLECTION_NAME_PREFIX, collection_meta_jsons);
|
||||
store->scan_fill(COLLECTION_META_PREFIX, collection_meta_jsons);
|
||||
|
||||
for(auto collection_meta_json: collection_meta_jsons) {
|
||||
nlohmann::json collection_meta = nlohmann::json::parse(collection_meta_json);
|
||||
@ -35,7 +35,7 @@ void CollectionManager::init(Store *store) {
|
||||
}
|
||||
|
||||
std::string collection_next_seq_id_str;
|
||||
store->get(get_collection_next_seq_id_key(this_collection_name), collection_next_seq_id_str);
|
||||
store->get(Collection::get_next_seq_id_key(this_collection_name), collection_next_seq_id_str);
|
||||
|
||||
uint32_t collection_next_seq_id = (const uint32_t) std::stoi(collection_next_seq_id_str);
|
||||
std::vector<std::string> collection_rank_fields =
|
||||
@ -50,30 +50,26 @@ void CollectionManager::init(Store *store) {
|
||||
|
||||
// Fetch records from the store and re-create memory index
|
||||
std::vector<std::string> documents;
|
||||
std::string seq_id_prefix = std::to_string(collection->get_collection_id()) + "_" + collection->SEQ_ID_PREFIX;
|
||||
rocksdb::Iterator* iter = store->scan(seq_id_prefix);
|
||||
const std::string seq_id_prefix = collection->get_seq_id_prefix();
|
||||
rocksdb::Iterator* iter = store->scan(collection->get_seq_id_prefix());
|
||||
|
||||
while(iter->Valid() && iter->key().starts_with(seq_id_prefix)) {
|
||||
const std::string doc_json_str = iter->value().ToString();
|
||||
nlohmann::json document = nlohmann::json::parse(doc_json_str);
|
||||
|
||||
std::string seq_id_str;
|
||||
store->get(collection->get_doc_id_key(document["id"]), seq_id_str);
|
||||
uint32_t seq_id = (uint32_t) std::stoi(seq_id_str);
|
||||
|
||||
uint32_t seq_id = collection->doc_id_to_seq_id(document["id"]);
|
||||
collection->index_in_memory(document, seq_id);
|
||||
iter->Next();
|
||||
}
|
||||
|
||||
delete iter;
|
||||
|
||||
collections.emplace(get_collection_name_key(this_collection_name), collection);
|
||||
collections.emplace(Collection::get_meta_key(this_collection_name), collection);
|
||||
}
|
||||
}
|
||||
|
||||
Collection* CollectionManager::create_collection(std::string name, const std::vector<field> & search_fields,
|
||||
const std::vector<std::string> & rank_fields) {
|
||||
if(store->contains(get_collection_name_key(name))) {
|
||||
if(store->contains(Collection::get_meta_key(name))) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
@ -92,30 +88,26 @@ Collection* CollectionManager::create_collection(std::string name, const std::ve
|
||||
collection_meta[COLLECTION_SEARCH_FIELDS_KEY] = search_fields_json;
|
||||
collection_meta[COLLECTION_RANK_FIELDS_KEY] = rank_fields;
|
||||
|
||||
store->insert(get_collection_name_key(name), collection_meta.dump());
|
||||
store->insert(get_collection_next_seq_id_key(name), std::to_string(0));
|
||||
|
||||
Collection* new_collection = new Collection(name, next_collection_id, 0, store, search_fields, rank_fields);
|
||||
|
||||
store->insert(Collection::get_meta_key(name), collection_meta.dump());
|
||||
store->insert(Collection::get_next_seq_id_key(name), std::to_string(0));
|
||||
|
||||
next_collection_id++;
|
||||
store->insert(NEXT_COLLECTION_ID_KEY, std::to_string(next_collection_id));
|
||||
|
||||
collections.emplace(get_collection_name_key(name), new_collection);
|
||||
collections.emplace(Collection::get_meta_key(name), new_collection);
|
||||
|
||||
return new_collection;
|
||||
}
|
||||
|
||||
std::string CollectionManager::get_collection_name_key(std::string collection_name) {
|
||||
return COLLECTION_NAME_PREFIX + collection_name;
|
||||
}
|
||||
|
||||
std::string CollectionManager::get_collection_next_seq_id_key(std::string collection_name) {
|
||||
return COLLECTION_NEXT_SEQ_PREFIX + collection_name + "_SEQ";
|
||||
std::string CollectionManager::get_collection_meta_key(std::string collection_name) {
|
||||
return COLLECTION_META_PREFIX + collection_name;
|
||||
}
|
||||
|
||||
Collection* CollectionManager::get_collection(std::string collection_name) {
|
||||
if(collections.count(get_collection_name_key(collection_name)) != 0) {
|
||||
return collections.at(get_collection_name_key(collection_name));
|
||||
if(collections.count(Collection::get_meta_key(collection_name)) != 0) {
|
||||
return collections.at(Collection::get_meta_key(collection_name));
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
@ -123,11 +115,7 @@ Collection* CollectionManager::get_collection(std::string collection_name) {
|
||||
|
||||
CollectionManager::~CollectionManager() {
|
||||
for(auto kv: collections) {
|
||||
if(kv.second != nullptr) {
|
||||
delete kv.second;
|
||||
kv.second = nullptr;
|
||||
collections.erase(get_collection_name_key(kv.first));
|
||||
}
|
||||
drop_collection(kv.first);
|
||||
}
|
||||
}
|
||||
|
||||
@ -137,11 +125,22 @@ bool CollectionManager::drop_collection(std::string collection_name) {
|
||||
return false;
|
||||
}
|
||||
|
||||
store->remove(Collection::get_meta_key(collection_name));
|
||||
store->remove(Collection::get_next_seq_id_key(collection_name));
|
||||
|
||||
const std::string &collection_id_str = std::to_string(collection->get_collection_id());
|
||||
rocksdb::Iterator* iter = store->scan(collection_id_str);
|
||||
while(iter->Valid() && iter->key().starts_with(collection_id_str)) {
|
||||
store->remove(iter->key().ToString());
|
||||
iter->Next();
|
||||
}
|
||||
|
||||
delete iter;
|
||||
|
||||
collections.erase(Collection::get_meta_key(collection_name));
|
||||
|
||||
delete collection;
|
||||
collection = nullptr;
|
||||
|
||||
collections.erase(get_collection_name_key(collection_name));
|
||||
|
||||
// TODO: remove all records from the store
|
||||
return true;
|
||||
}
|
||||
|
@ -16,6 +16,10 @@ using namespace std;
|
||||
int main(int argc, char* argv[]) {
|
||||
const std::string state_dir_path = "/tmp/typesense-data";
|
||||
|
||||
std::vector<field> fields_to_index = {field("title", field_types::STRING)};
|
||||
std::vector<std::string> rank_fields = {"points"};
|
||||
Store *store = new Store("/tmp/typesense-data");
|
||||
|
||||
CollectionManager & collectionManager = CollectionManager::get_instance();
|
||||
collectionManager.init(store);
|
||||
|
||||
|
@ -38,6 +38,8 @@ TEST(CollectionManagerTest, RestoreRecordsOnRestart) {
|
||||
nlohmann::json results = collection1->search("thomas", search_fields, 0, 10, FREQUENCY, false);
|
||||
ASSERT_EQ(4, results["hits"].size());
|
||||
|
||||
spp::sparse_hash_map<std::string, field> schema = collection1->get_schema();
|
||||
|
||||
// create a new collection manager to ensure that it restores the records from the disk backed store
|
||||
CollectionManager & collectionManager2 = CollectionManager::get_instance();
|
||||
collectionManager2.init(store);
|
||||
@ -45,6 +47,15 @@ TEST(CollectionManagerTest, RestoreRecordsOnRestart) {
|
||||
collection1 = collectionManager2.get_collection("collection1");
|
||||
ASSERT_NE(nullptr, collection1);
|
||||
|
||||
ASSERT_EQ(0, collection1->get_collection_id());
|
||||
ASSERT_EQ(18, collection1->get_next_seq_id());
|
||||
ASSERT_EQ(rank_fields, collection1->get_rank_fields());
|
||||
ASSERT_EQ(schema.size(), collection1->get_schema().size());
|
||||
|
||||
results = collection1->search("thomas", search_fields, 0, 10, FREQUENCY, false);
|
||||
ASSERT_EQ(4, results["hits"].size());
|
||||
}
|
||||
|
||||
TEST(CollectionManagerTest, DropCollectionCleanly) {
|
||||
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user