Fix ordering of sequence id rocksdb keys.

This commit is contained in:
Kishore Nallan 2017-02-04 21:32:18 +05:30
parent b880cfd531
commit 3ef10b5bb0
5 changed files with 23 additions and 32 deletions

View File

@ -11,12 +11,6 @@
class Collection {
private:
// Using a $ prefix so that these meta keys stay above record entries in a lexicographically ordered KV store
static constexpr const char* COLLECTION_META_PREFIX = "$CM";
static constexpr const char* DOC_ID_PREFIX = "$DI";
static constexpr const char* COLLECTION_NEXT_SEQ_PREFIX = "$CS";
static constexpr const char* SEQ_ID_PREFIX = "$SI";
std::string name;
uint32_t collection_id;
@ -79,7 +73,7 @@ public:
static std::string get_meta_key(std::string collection_name);
std::string get_seq_id_prefix();
std::string get_seq_id_collection_prefix();
uint32_t get_collection_id();
@ -102,9 +96,15 @@ public:
void score_results(Topster<100> &topster, const int & token_rank, const std::vector<art_leaf *> &query_suggestion,
const uint32_t *result_ids, const size_t result_size) const;
void index_in_memory(const nlohmann::json &document, uint32_t seq_id);
enum {MAX_SEARCH_TOKENS = 20};
enum {MAX_RESULTS = 100};
void index_in_memory(const nlohmann::json &document, uint32_t seq_id);
// Using a $ prefix so that these meta keys stay above record entries in a lexicographically ordered KV store
static constexpr const char* COLLECTION_META_PREFIX = "$CM";
static constexpr const char* COLLECTION_NEXT_SEQ_PREFIX = "$CS";
static constexpr const char* SEQ_ID_PREFIX = "$SI";
static constexpr const char* DOC_ID_PREFIX = "$DI";
};

View File

@ -18,8 +18,6 @@ private:
// Using a ID instead of a collection's name makes renaming possible
uint32_t next_collection_id;
static constexpr const char* COLLECTION_META_PREFIX = "$CM";
static constexpr const char* COLLECTION_NAME_KEY = "name";
static constexpr const char* COLLECTION_ID_KEY = "id";
static constexpr const char* COLLECTION_SEARCH_FIELDS_KEY = "search_fields";
@ -27,8 +25,6 @@ private:
CollectionManager();
static std::string get_collection_meta_key(std::string collection_name);
public:
static CollectionManager& get_instance() {
static CollectionManager instance;

View File

@ -606,18 +606,18 @@ void Collection::remove(std::string id) {
}
std::string Collection::get_next_seq_id_key(std::string collection_name) {
return COLLECTION_NEXT_SEQ_PREFIX + collection_name + "_SEQ";
return std::string(COLLECTION_NEXT_SEQ_PREFIX) + "_" + collection_name;
}
std::string Collection::get_seq_id_key(uint32_t seq_id) {
// We can't simply do std::to_string() because we want to preserve the byte order
union byteuint32_t {
char bytes[4];
uint32_t i;
};
byteuint32_t buint;
buint.i = seq_id;
return std::to_string(collection_id) + "_" + SEQ_ID_PREFIX + std::string(buint.bytes);
unsigned char bytes[4];
bytes[0] = (unsigned char) ((seq_id >> 24) & 0xFF);
bytes[1] = (unsigned char) ((seq_id >> 16) & 0xFF);
bytes[2] = (unsigned char) ((seq_id >> 8) & 0xFF);
bytes[3] = (unsigned char) ((seq_id & 0xFF));
return get_seq_id_collection_prefix() + "_" + std::string(bytes, bytes+4);
}
std::string Collection::get_doc_id_key(std::string doc_id) {
@ -647,6 +647,6 @@ std::string Collection::get_meta_key(std::string collection_name) {
return COLLECTION_META_PREFIX + collection_name;
}
std::string Collection::get_seq_id_prefix() {
return std::to_string(collection_id) + "_" + SEQ_ID_PREFIX;
std::string Collection::get_seq_id_collection_prefix() {
return std::to_string(collection_id) + "_" + std::string(SEQ_ID_PREFIX);
}

View File

@ -21,7 +21,7 @@ void CollectionManager::init(Store *store) {
}
std::vector<std::string> collection_meta_jsons;
store->scan_fill(COLLECTION_META_PREFIX, collection_meta_jsons);
store->scan_fill(Collection::COLLECTION_META_PREFIX, collection_meta_jsons);
for(auto collection_meta_json: collection_meta_jsons) {
nlohmann::json collection_meta = nlohmann::json::parse(collection_meta_json);
@ -50,8 +50,8 @@ void CollectionManager::init(Store *store) {
// Fetch records from the store and re-create memory index
std::vector<std::string> documents;
const std::string seq_id_prefix = collection->get_seq_id_prefix();
rocksdb::Iterator* iter = store->scan(collection->get_seq_id_prefix());
const std::string seq_id_prefix = collection->get_seq_id_collection_prefix();
rocksdb::Iterator* iter = store->scan(seq_id_prefix);
while(iter->Valid() && iter->key().starts_with(seq_id_prefix)) {
const std::string doc_json_str = iter->value().ToString();
@ -101,10 +101,6 @@ Collection* CollectionManager::create_collection(std::string name, const std::ve
return new_collection;
}
std::string CollectionManager::get_collection_meta_key(std::string collection_name) {
return COLLECTION_META_PREFIX + collection_name;
}
Collection* CollectionManager::get_collection(std::string collection_name) {
if(collections.count(Collection::get_meta_key(collection_name)) != 0) {
return collections.at(Collection::get_meta_key(collection_name));

View File

@ -220,11 +220,10 @@ int main(int argc, char **argv) {
collection = collectionManager.get_collection("collection");
if(collection == nullptr) {
collection = collectionManager.create_collection("collection", search_fields, rank_fields);
//index_documents(std::string(ROOT_DIR)+"test/documents.jsonl");
index_documents(argv[1]);
}
//index_documents(std::string(ROOT_DIR)+"test/documents.jsonl");
index_documents(argv[1]);
h2o_config_init(&config);
h2o_hostconf_t *hostconf = h2o_config_register_host(&config, h2o_iovec_init(H2O_STRLIT("default")), 65535);
register_handler(hostconf, "/add", post_add_document);