Minor refactoring of collection manager.

This commit is contained in:
Kishore Nallan 2017-01-26 07:34:59 -06:00
parent 216ac7997a
commit 8475cba007
7 changed files with 100 additions and 53 deletions

View File

@ -20,7 +20,7 @@
- ~~Multi field search tests~~
- ~~storage key prefix should include collection name~~
- Index and search on multi-valued field
- Restore records as well on restart (like for meta)
- ~~Restore records as well on restart (like for meta)~~
- drop collection should remove all records from the store
- Pagination parameter
- UTF-8 support for fuzzy search

View File

@ -11,6 +11,12 @@
class Collection {
private:
// Using a $ prefix so that these meta keys stay above record entries in a lexicographically ordered KV store
static constexpr const char* COLLECTION_META_PREFIX = "$CM";
static constexpr const char* DOC_ID_PREFIX = "$DI";
static constexpr const char* COLLECTION_NEXT_SEQ_PREFIX = "$CS";
static constexpr const char* SEQ_ID_PREFIX = "$SI";
std::string name;
uint32_t collection_id;
@ -30,8 +36,9 @@ private:
spp::sparse_hash_map<uint32_t, int64_t> secondary_rank_scores;
std::string get_collection_next_seq_id_key(std::string collection_name);
uint32_t get_next_seq_id();
std::string get_doc_id_key(std::string doc_id);
std::string get_seq_id_key(uint32_t seq_id);
static inline std::vector<art_leaf *> next_suggestion(const std::vector<std::vector<art_leaf *>> &token_leaves,
long long int n);
@ -57,11 +64,21 @@ public:
~Collection();
static std::string get_next_seq_id_key(std::string collection_name);
static std::string get_meta_key(std::string collection_name);
std::string get_seq_id_prefix();
uint32_t get_collection_id();
std::string get_seq_id_key(uint32_t seq_id);
uint32_t get_next_seq_id();
std::string get_doc_id_key(std::string doc_id);
uint32_t doc_id_to_seq_id(std::string doc_id);
std::vector<std::string> get_rank_fields();
spp::sparse_hash_map<std::string, field> get_schema();
std::string add(std::string json_str);
@ -69,15 +86,10 @@ public:
const size_t num_results, const token_ordering token_order = FREQUENCY,
const bool prefix = false);
void remove(std::string id);
void score_results(Topster<100> &topster, const int & token_rank, const std::vector<art_leaf *> &query_suggestion,
const uint32_t *result_ids, const size_t result_size) const;
// Using a $ prefix so that these keys stay at the top of a lexicographically ordered KV store
const std::string SEQ_ID_PREFIX = "$SI";
const std::string DOC_ID_PREFIX = "$DI";
const std::string COLLECTION_NEXT_SEQ_PREFIX = "$CS";
enum {MAX_SEARCH_TOKENS = 20};
enum {MAX_RESULTS = 100};

View File

@ -18,19 +18,17 @@ private:
// Using a ID instead of a collection's name makes renaming possible
uint32_t next_collection_id;
const std::string NEXT_COLLECTION_ID_KEY = "$CI";
const std::string COLLECTION_NAME_PREFIX = "$CN";
const std::string COLLECTION_NEXT_SEQ_PREFIX = "$CS";
static constexpr const char* COLLECTION_META_PREFIX = "$CM";
static constexpr const char* NEXT_COLLECTION_ID_KEY = "$CI";
const std::string COLLECTION_NAME_KEY = "name";
const std::string COLLECTION_ID_KEY = "id";
const std::string COLLECTION_SEARCH_FIELDS_KEY = "search_fields";
const std::string COLLECTION_RANK_FIELDS_KEY = "rank_fields";
static constexpr const char* COLLECTION_NAME_KEY = "name";
static constexpr const char* COLLECTION_ID_KEY = "id";
static constexpr const char* COLLECTION_SEARCH_FIELDS_KEY = "search_fields";
static constexpr const char* COLLECTION_RANK_FIELDS_KEY = "rank_fields";
CollectionManager();
std::string get_collection_name_key(std::string collection_name);
std::string get_collection_next_seq_id_key(std::string collection_name);
static std::string get_collection_meta_key(std::string collection_name);
public:
static CollectionManager& get_instance() {

View File

@ -27,7 +27,7 @@ Collection::~Collection() {
}
uint32_t Collection::get_next_seq_id() {
store->increment(get_collection_next_seq_id_key(name), 1);
store->increment(get_next_seq_id_key(name), 1);
return next_seq_id++;
}
@ -550,7 +550,7 @@ void Collection::remove(std::string id) {
store->remove(get_seq_id_key(seq_id));
}
std::string Collection::get_collection_next_seq_id_key(std::string collection_name) {
std::string Collection::get_next_seq_id_key(std::string collection_name) {
return COLLECTION_NEXT_SEQ_PREFIX + collection_name + "_SEQ";
}
@ -572,3 +572,26 @@ std::string Collection::get_doc_id_key(std::string doc_id) {
uint32_t Collection::get_collection_id() {
return collection_id;
}
uint32_t Collection::doc_id_to_seq_id(std::string doc_id) {
std::string seq_id_str;
store->get(get_doc_id_key(doc_id), seq_id_str);
uint32_t seq_id = (uint32_t) std::stoi(seq_id_str);
return seq_id;
}
std::vector<std::string> Collection::get_rank_fields() {
return rank_fields;
}
spp::sparse_hash_map<std::string, field> Collection::get_schema() {
return schema;
};
std::string Collection::get_meta_key(std::string collection_name) {
return COLLECTION_META_PREFIX + collection_name;
}
std::string Collection::get_seq_id_prefix() {
return std::to_string(collection_id) + "_" + SEQ_ID_PREFIX;
}

View File

@ -21,7 +21,7 @@ void CollectionManager::init(Store *store) {
}
std::vector<std::string> collection_meta_jsons;
store->scan_fill(COLLECTION_NAME_PREFIX, collection_meta_jsons);
store->scan_fill(COLLECTION_META_PREFIX, collection_meta_jsons);
for(auto collection_meta_json: collection_meta_jsons) {
nlohmann::json collection_meta = nlohmann::json::parse(collection_meta_json);
@ -35,7 +35,7 @@ void CollectionManager::init(Store *store) {
}
std::string collection_next_seq_id_str;
store->get(get_collection_next_seq_id_key(this_collection_name), collection_next_seq_id_str);
store->get(Collection::get_next_seq_id_key(this_collection_name), collection_next_seq_id_str);
uint32_t collection_next_seq_id = (const uint32_t) std::stoi(collection_next_seq_id_str);
std::vector<std::string> collection_rank_fields =
@ -50,30 +50,26 @@ void CollectionManager::init(Store *store) {
// Fetch records from the store and re-create memory index
std::vector<std::string> documents;
std::string seq_id_prefix = std::to_string(collection->get_collection_id()) + "_" + collection->SEQ_ID_PREFIX;
rocksdb::Iterator* iter = store->scan(seq_id_prefix);
const std::string seq_id_prefix = collection->get_seq_id_prefix();
rocksdb::Iterator* iter = store->scan(collection->get_seq_id_prefix());
while(iter->Valid() && iter->key().starts_with(seq_id_prefix)) {
const std::string doc_json_str = iter->value().ToString();
nlohmann::json document = nlohmann::json::parse(doc_json_str);
std::string seq_id_str;
store->get(collection->get_doc_id_key(document["id"]), seq_id_str);
uint32_t seq_id = (uint32_t) std::stoi(seq_id_str);
uint32_t seq_id = collection->doc_id_to_seq_id(document["id"]);
collection->index_in_memory(document, seq_id);
iter->Next();
}
delete iter;
collections.emplace(get_collection_name_key(this_collection_name), collection);
collections.emplace(Collection::get_meta_key(this_collection_name), collection);
}
}
Collection* CollectionManager::create_collection(std::string name, const std::vector<field> & search_fields,
const std::vector<std::string> & rank_fields) {
if(store->contains(get_collection_name_key(name))) {
if(store->contains(Collection::get_meta_key(name))) {
return nullptr;
}
@ -92,30 +88,26 @@ Collection* CollectionManager::create_collection(std::string name, const std::ve
collection_meta[COLLECTION_SEARCH_FIELDS_KEY] = search_fields_json;
collection_meta[COLLECTION_RANK_FIELDS_KEY] = rank_fields;
store->insert(get_collection_name_key(name), collection_meta.dump());
store->insert(get_collection_next_seq_id_key(name), std::to_string(0));
Collection* new_collection = new Collection(name, next_collection_id, 0, store, search_fields, rank_fields);
store->insert(Collection::get_meta_key(name), collection_meta.dump());
store->insert(Collection::get_next_seq_id_key(name), std::to_string(0));
next_collection_id++;
store->insert(NEXT_COLLECTION_ID_KEY, std::to_string(next_collection_id));
collections.emplace(get_collection_name_key(name), new_collection);
collections.emplace(Collection::get_meta_key(name), new_collection);
return new_collection;
}
std::string CollectionManager::get_collection_name_key(std::string collection_name) {
return COLLECTION_NAME_PREFIX + collection_name;
}
std::string CollectionManager::get_collection_next_seq_id_key(std::string collection_name) {
return COLLECTION_NEXT_SEQ_PREFIX + collection_name + "_SEQ";
std::string CollectionManager::get_collection_meta_key(std::string collection_name) {
return COLLECTION_META_PREFIX + collection_name;
}
Collection* CollectionManager::get_collection(std::string collection_name) {
if(collections.count(get_collection_name_key(collection_name)) != 0) {
return collections.at(get_collection_name_key(collection_name));
if(collections.count(Collection::get_meta_key(collection_name)) != 0) {
return collections.at(Collection::get_meta_key(collection_name));
}
return nullptr;
@ -123,11 +115,7 @@ Collection* CollectionManager::get_collection(std::string collection_name) {
CollectionManager::~CollectionManager() {
for(auto kv: collections) {
if(kv.second != nullptr) {
delete kv.second;
kv.second = nullptr;
collections.erase(get_collection_name_key(kv.first));
}
drop_collection(kv.first);
}
}
@ -137,11 +125,22 @@ bool CollectionManager::drop_collection(std::string collection_name) {
return false;
}
store->remove(Collection::get_meta_key(collection_name));
store->remove(Collection::get_next_seq_id_key(collection_name));
const std::string &collection_id_str = std::to_string(collection->get_collection_id());
rocksdb::Iterator* iter = store->scan(collection_id_str);
while(iter->Valid() && iter->key().starts_with(collection_id_str)) {
store->remove(iter->key().ToString());
iter->Next();
}
delete iter;
collections.erase(Collection::get_meta_key(collection_name));
delete collection;
collection = nullptr;
collections.erase(get_collection_name_key(collection_name));
// TODO: remove all records from the store
return true;
}

View File

@ -16,6 +16,10 @@ using namespace std;
int main(int argc, char* argv[]) {
const std::string state_dir_path = "/tmp/typesense-data";
std::vector<field> fields_to_index = {field("title", field_types::STRING)};
std::vector<std::string> rank_fields = {"points"};
Store *store = new Store("/tmp/typesense-data");
CollectionManager & collectionManager = CollectionManager::get_instance();
collectionManager.init(store);

View File

@ -38,6 +38,8 @@ TEST(CollectionManagerTest, RestoreRecordsOnRestart) {
nlohmann::json results = collection1->search("thomas", search_fields, 0, 10, FREQUENCY, false);
ASSERT_EQ(4, results["hits"].size());
spp::sparse_hash_map<std::string, field> schema = collection1->get_schema();
// create a new collection manager to ensure that it restores the records from the disk backed store
CollectionManager & collectionManager2 = CollectionManager::get_instance();
collectionManager2.init(store);
@ -45,6 +47,15 @@ TEST(CollectionManagerTest, RestoreRecordsOnRestart) {
collection1 = collectionManager2.get_collection("collection1");
ASSERT_NE(nullptr, collection1);
ASSERT_EQ(0, collection1->get_collection_id());
ASSERT_EQ(18, collection1->get_next_seq_id());
ASSERT_EQ(rank_fields, collection1->get_rank_fields());
ASSERT_EQ(schema.size(), collection1->get_schema().size());
results = collection1->search("thomas", search_fields, 0, 10, FREQUENCY, false);
ASSERT_EQ(4, results["hits"].size());
}
TEST(CollectionManagerTest, DropCollectionCleanly) {
}