diff --git a/TODO.md b/TODO.md index 08baa622..126266ac 100644 --- a/TODO.md +++ b/TODO.md @@ -20,7 +20,7 @@ - ~~Multi field search tests~~ - ~~storage key prefix should include collection name~~ - Index and search on multi-valued field -- Restore records as well on restart (like for meta) +- ~~Restore records as well on restart (like for meta)~~ - drop collection should remove all records from the store - Pagination parameter - UTF-8 support for fuzzy search diff --git a/include/collection.h b/include/collection.h index 20026d15..e6d2d3ba 100644 --- a/include/collection.h +++ b/include/collection.h @@ -11,6 +11,12 @@ class Collection { private: + // Using a $ prefix so that these meta keys stay above record entries in a lexicographically ordered KV store + static constexpr const char* COLLECTION_META_PREFIX = "$CM"; + static constexpr const char* DOC_ID_PREFIX = "$DI"; + static constexpr const char* COLLECTION_NEXT_SEQ_PREFIX = "$CS"; + static constexpr const char* SEQ_ID_PREFIX = "$SI"; + std::string name; uint32_t collection_id; @@ -30,8 +36,9 @@ private: spp::sparse_hash_map secondary_rank_scores; - std::string get_collection_next_seq_id_key(std::string collection_name); - uint32_t get_next_seq_id(); + std::string get_doc_id_key(std::string doc_id); + + std::string get_seq_id_key(uint32_t seq_id); static inline std::vector next_suggestion(const std::vector> &token_leaves, long long int n); @@ -57,11 +64,21 @@ public: ~Collection(); + static std::string get_next_seq_id_key(std::string collection_name); + + static std::string get_meta_key(std::string collection_name); + + std::string get_seq_id_prefix(); + uint32_t get_collection_id(); - std::string get_seq_id_key(uint32_t seq_id); + uint32_t get_next_seq_id(); - std::string get_doc_id_key(std::string doc_id); + uint32_t doc_id_to_seq_id(std::string doc_id); + + std::vector get_rank_fields(); + + spp::sparse_hash_map get_schema(); std::string add(std::string json_str); @@ -69,15 +86,10 @@ public: const size_t num_results, const token_ordering token_order = FREQUENCY, const bool prefix = false); void remove(std::string id); + void score_results(Topster<100> &topster, const int & token_rank, const std::vector &query_suggestion, const uint32_t *result_ids, const size_t result_size) const; - // Using a $ prefix so that these keys stay at the top of a lexicographically ordered KV store - const std::string SEQ_ID_PREFIX = "$SI"; - const std::string DOC_ID_PREFIX = "$DI"; - - const std::string COLLECTION_NEXT_SEQ_PREFIX = "$CS"; - enum {MAX_SEARCH_TOKENS = 20}; enum {MAX_RESULTS = 100}; diff --git a/include/collection_manager.h b/include/collection_manager.h index f12c0056..b8afc7d9 100644 --- a/include/collection_manager.h +++ b/include/collection_manager.h @@ -18,19 +18,17 @@ private: // Using a ID instead of a collection's name makes renaming possible uint32_t next_collection_id; - const std::string NEXT_COLLECTION_ID_KEY = "$CI"; - const std::string COLLECTION_NAME_PREFIX = "$CN"; - const std::string COLLECTION_NEXT_SEQ_PREFIX = "$CS"; + static constexpr const char* COLLECTION_META_PREFIX = "$CM"; + static constexpr const char* NEXT_COLLECTION_ID_KEY = "$CI"; - const std::string COLLECTION_NAME_KEY = "name"; - const std::string COLLECTION_ID_KEY = "id"; - const std::string COLLECTION_SEARCH_FIELDS_KEY = "search_fields"; - const std::string COLLECTION_RANK_FIELDS_KEY = "rank_fields"; + static constexpr const char* COLLECTION_NAME_KEY = "name"; + static constexpr const char* COLLECTION_ID_KEY = "id"; + static constexpr const char* COLLECTION_SEARCH_FIELDS_KEY = "search_fields"; + static constexpr const char* COLLECTION_RANK_FIELDS_KEY = "rank_fields"; CollectionManager(); - std::string get_collection_name_key(std::string collection_name); - std::string get_collection_next_seq_id_key(std::string collection_name); + static std::string get_collection_meta_key(std::string collection_name); public: static CollectionManager& get_instance() { diff --git a/src/collection.cpp b/src/collection.cpp index 2fd70b2d..2499fe46 100644 --- a/src/collection.cpp +++ b/src/collection.cpp @@ -27,7 +27,7 @@ Collection::~Collection() { } uint32_t Collection::get_next_seq_id() { - store->increment(get_collection_next_seq_id_key(name), 1); + store->increment(get_next_seq_id_key(name), 1); return next_seq_id++; } @@ -550,7 +550,7 @@ void Collection::remove(std::string id) { store->remove(get_seq_id_key(seq_id)); } -std::string Collection::get_collection_next_seq_id_key(std::string collection_name) { +std::string Collection::get_next_seq_id_key(std::string collection_name) { return COLLECTION_NEXT_SEQ_PREFIX + collection_name + "_SEQ"; } @@ -572,3 +572,26 @@ std::string Collection::get_doc_id_key(std::string doc_id) { uint32_t Collection::get_collection_id() { return collection_id; } + +uint32_t Collection::doc_id_to_seq_id(std::string doc_id) { + std::string seq_id_str; + store->get(get_doc_id_key(doc_id), seq_id_str); + uint32_t seq_id = (uint32_t) std::stoi(seq_id_str); + return seq_id; +} + +std::vector Collection::get_rank_fields() { + return rank_fields; +} + +spp::sparse_hash_map Collection::get_schema() { + return schema; +}; + +std::string Collection::get_meta_key(std::string collection_name) { + return COLLECTION_META_PREFIX + collection_name; +} + +std::string Collection::get_seq_id_prefix() { + return std::to_string(collection_id) + "_" + SEQ_ID_PREFIX; +} \ No newline at end of file diff --git a/src/collection_manager.cpp b/src/collection_manager.cpp index fc844f0e..61ff76f7 100644 --- a/src/collection_manager.cpp +++ b/src/collection_manager.cpp @@ -21,7 +21,7 @@ void CollectionManager::init(Store *store) { } std::vector collection_meta_jsons; - store->scan_fill(COLLECTION_NAME_PREFIX, collection_meta_jsons); + store->scan_fill(COLLECTION_META_PREFIX, collection_meta_jsons); for(auto collection_meta_json: collection_meta_jsons) { nlohmann::json collection_meta = nlohmann::json::parse(collection_meta_json); @@ -35,7 +35,7 @@ void CollectionManager::init(Store *store) { } std::string collection_next_seq_id_str; - store->get(get_collection_next_seq_id_key(this_collection_name), collection_next_seq_id_str); + store->get(Collection::get_next_seq_id_key(this_collection_name), collection_next_seq_id_str); uint32_t collection_next_seq_id = (const uint32_t) std::stoi(collection_next_seq_id_str); std::vector collection_rank_fields = @@ -50,30 +50,26 @@ void CollectionManager::init(Store *store) { // Fetch records from the store and re-create memory index std::vector documents; - std::string seq_id_prefix = std::to_string(collection->get_collection_id()) + "_" + collection->SEQ_ID_PREFIX; - rocksdb::Iterator* iter = store->scan(seq_id_prefix); + const std::string seq_id_prefix = collection->get_seq_id_prefix(); + rocksdb::Iterator* iter = store->scan(collection->get_seq_id_prefix()); while(iter->Valid() && iter->key().starts_with(seq_id_prefix)) { const std::string doc_json_str = iter->value().ToString(); nlohmann::json document = nlohmann::json::parse(doc_json_str); - - std::string seq_id_str; - store->get(collection->get_doc_id_key(document["id"]), seq_id_str); - uint32_t seq_id = (uint32_t) std::stoi(seq_id_str); - + uint32_t seq_id = collection->doc_id_to_seq_id(document["id"]); collection->index_in_memory(document, seq_id); iter->Next(); } delete iter; - collections.emplace(get_collection_name_key(this_collection_name), collection); + collections.emplace(Collection::get_meta_key(this_collection_name), collection); } } Collection* CollectionManager::create_collection(std::string name, const std::vector & search_fields, const std::vector & rank_fields) { - if(store->contains(get_collection_name_key(name))) { + if(store->contains(Collection::get_meta_key(name))) { return nullptr; } @@ -92,30 +88,26 @@ Collection* CollectionManager::create_collection(std::string name, const std::ve collection_meta[COLLECTION_SEARCH_FIELDS_KEY] = search_fields_json; collection_meta[COLLECTION_RANK_FIELDS_KEY] = rank_fields; - store->insert(get_collection_name_key(name), collection_meta.dump()); - store->insert(get_collection_next_seq_id_key(name), std::to_string(0)); - Collection* new_collection = new Collection(name, next_collection_id, 0, store, search_fields, rank_fields); + store->insert(Collection::get_meta_key(name), collection_meta.dump()); + store->insert(Collection::get_next_seq_id_key(name), std::to_string(0)); + next_collection_id++; store->insert(NEXT_COLLECTION_ID_KEY, std::to_string(next_collection_id)); - collections.emplace(get_collection_name_key(name), new_collection); + collections.emplace(Collection::get_meta_key(name), new_collection); return new_collection; } -std::string CollectionManager::get_collection_name_key(std::string collection_name) { - return COLLECTION_NAME_PREFIX + collection_name; -} - -std::string CollectionManager::get_collection_next_seq_id_key(std::string collection_name) { - return COLLECTION_NEXT_SEQ_PREFIX + collection_name + "_SEQ"; +std::string CollectionManager::get_collection_meta_key(std::string collection_name) { + return COLLECTION_META_PREFIX + collection_name; } Collection* CollectionManager::get_collection(std::string collection_name) { - if(collections.count(get_collection_name_key(collection_name)) != 0) { - return collections.at(get_collection_name_key(collection_name)); + if(collections.count(Collection::get_meta_key(collection_name)) != 0) { + return collections.at(Collection::get_meta_key(collection_name)); } return nullptr; @@ -123,11 +115,7 @@ Collection* CollectionManager::get_collection(std::string collection_name) { CollectionManager::~CollectionManager() { for(auto kv: collections) { - if(kv.second != nullptr) { - delete kv.second; - kv.second = nullptr; - collections.erase(get_collection_name_key(kv.first)); - } + drop_collection(kv.first); } } @@ -137,11 +125,22 @@ bool CollectionManager::drop_collection(std::string collection_name) { return false; } + store->remove(Collection::get_meta_key(collection_name)); + store->remove(Collection::get_next_seq_id_key(collection_name)); + + const std::string &collection_id_str = std::to_string(collection->get_collection_id()); + rocksdb::Iterator* iter = store->scan(collection_id_str); + while(iter->Valid() && iter->key().starts_with(collection_id_str)) { + store->remove(iter->key().ToString()); + iter->Next(); + } + + delete iter; + + collections.erase(Collection::get_meta_key(collection_name)); + delete collection; collection = nullptr; - collections.erase(get_collection_name_key(collection_name)); - - // TODO: remove all records from the store return true; } diff --git a/src/main/main.cpp b/src/main/main.cpp index f16e0d1a..ed758d7a 100644 --- a/src/main/main.cpp +++ b/src/main/main.cpp @@ -16,6 +16,10 @@ using namespace std; int main(int argc, char* argv[]) { const std::string state_dir_path = "/tmp/typesense-data"; + std::vector fields_to_index = {field("title", field_types::STRING)}; + std::vector rank_fields = {"points"}; + Store *store = new Store("/tmp/typesense-data"); + CollectionManager & collectionManager = CollectionManager::get_instance(); collectionManager.init(store); diff --git a/test/collection_manager_test.cpp b/test/collection_manager_test.cpp index e6b3bdc8..2d12d9e1 100644 --- a/test/collection_manager_test.cpp +++ b/test/collection_manager_test.cpp @@ -38,6 +38,8 @@ TEST(CollectionManagerTest, RestoreRecordsOnRestart) { nlohmann::json results = collection1->search("thomas", search_fields, 0, 10, FREQUENCY, false); ASSERT_EQ(4, results["hits"].size()); + spp::sparse_hash_map schema = collection1->get_schema(); + // create a new collection manager to ensure that it restores the records from the disk backed store CollectionManager & collectionManager2 = CollectionManager::get_instance(); collectionManager2.init(store); @@ -45,6 +47,15 @@ TEST(CollectionManagerTest, RestoreRecordsOnRestart) { collection1 = collectionManager2.get_collection("collection1"); ASSERT_NE(nullptr, collection1); + ASSERT_EQ(0, collection1->get_collection_id()); + ASSERT_EQ(18, collection1->get_next_seq_id()); + ASSERT_EQ(rank_fields, collection1->get_rank_fields()); + ASSERT_EQ(schema.size(), collection1->get_schema().size()); + results = collection1->search("thomas", search_fields, 0, 10, FREQUENCY, false); ASSERT_EQ(4, results["hits"].size()); +} + +TEST(CollectionManagerTest, DropCollectionCleanly) { + } \ No newline at end of file