Serialize and deserialize next sequence id properly instead of string encoding.

This commit is contained in:
Kishore Nallan 2018-02-02 19:02:19 -05:00
parent b73dbebd2d
commit 0f7530ed13
8 changed files with 29 additions and 23 deletions

View File

@ -90,8 +90,8 @@
- ~~Use rocksdb batch put for atomic insertion~~
- ~~Proper logging~~
- ~~Handle store-get() not finding a key~~
- ~~Deprecate converting integer to string verbatim~~
- Deprecate union type punning
- Deprecate converting integer to string verbatim
- NOT operator support
- > INT32_MAX validation for float field
- highlight of string arrays?

View File

@ -78,8 +78,6 @@ public:
void increment_next_seq_id_field();
static uint32_t deserialize_seq_id_key(std::string serialized_seq_id);
Option<uint32_t> doc_id_to_seq_id(std::string doc_id);
std::vector<std::string> get_facet_fields();

View File

@ -161,7 +161,7 @@ public:
// last 4 bytes of the key would be the serialized version of the sequence id
std::string serialized_seq_id = replication_event->key.substr(replication_event->key.length() - 4);
uint32_t seq_id = Collection::deserialize_seq_id_key(serialized_seq_id);
uint32_t seq_id = StringUtils::deserialize_uint32_t(serialized_seq_id);
collection->index_in_memory(document, seq_id);
}

View File

@ -11,6 +11,7 @@
#include <rocksdb/options.h>
#include <rocksdb/merge_operator.h>
#include <rocksdb/transaction_log.h>
#include "string_utils.h"
#include "logger.h"
class UInt64AddOperator : public rocksdb::AssociativeMergeOperator {
@ -19,9 +20,9 @@ public:
std::string* new_value, rocksdb::Logger* logger) const override {
uint64_t existing = 0;
if (existing_value) {
existing = (uint64_t) std::stoi(existing_value->ToString());
existing = StringUtils::deserialize_uint32_t(existing_value->ToString());
}
*new_value = std::to_string(existing + std::stoi(value.ToString()));
*new_value = StringUtils::serialize_uint32_t(existing + StringUtils::deserialize_uint32_t(value.ToString()));
return true;
}
@ -134,7 +135,7 @@ public:
}
void increment(const std::string & key, uint32_t value) {
db->Merge(rocksdb::WriteOptions(), key, std::to_string(value));
db->Merge(rocksdb::WriteOptions(), key, StringUtils::serialize_uint32_t(value));
}
uint64_t get_latest_seq_number() const {

View File

@ -177,4 +177,20 @@ struct StringUtils {
return out;
}
static std::string serialize_uint32_t(uint32_t num) {
unsigned char bytes[4];
bytes[0] = (unsigned char) ((num >> 24) & 0xFF);
bytes[1] = (unsigned char) ((num >> 16) & 0xFF);
bytes[2] = (unsigned char) ((num >> 8) & 0xFF);
bytes[3] = (unsigned char) ((num & 0xFF));
return std::string(bytes, bytes+4);
}
static uint32_t deserialize_uint32_t(std::string serialized_num) {
uint32_t seq_id = ((serialized_num[0] & 0xFF) << 24) | ((serialized_num[1] & 0xFF) << 16) |
((serialized_num[2] & 0xFF) << 8) | (serialized_num[3] & 0xFF);
return seq_id;
}
};

View File

@ -731,19 +731,8 @@ std::string Collection::get_next_seq_id_key(const std::string & collection_name)
std::string Collection::get_seq_id_key(uint32_t seq_id) {
// We can't simply do std::to_string() because we want to preserve the byte order.
// & 0xFF masks all but the lowest eight bits.
unsigned char bytes[4];
bytes[0] = (unsigned char) ((seq_id >> 24) & 0xFF);
bytes[1] = (unsigned char) ((seq_id >> 16) & 0xFF);
bytes[2] = (unsigned char) ((seq_id >> 8) & 0xFF);
bytes[3] = (unsigned char) ((seq_id & 0xFF));
return get_seq_id_collection_prefix() + "_" + std::string(bytes, bytes+4);
}
uint32_t Collection::deserialize_seq_id_key(std::string serialized_seq_id) {
uint32_t seq_id = ((serialized_seq_id[0] & 0xFF) << 24) | ((serialized_seq_id[1] & 0xFF) << 16) |
((serialized_seq_id[2] & 0xFF) << 8) | (serialized_seq_id[3] & 0xFF);
return seq_id;
const std::string & serialized_id = StringUtils::serialize_uint32_t(seq_id);
return get_seq_id_collection_prefix() + "_" + serialized_id;
}
std::string Collection::get_doc_id_key(const std::string & doc_id) {

View File

@ -83,7 +83,7 @@ Option<bool> CollectionManager::init(Store *store, const std::string & auth_key,
}
uint32_t collection_next_seq_id = next_seq_id_status == StoreStatus::NOT_FOUND ? 0 :
(const uint32_t) std::stoi(collection_next_seq_id_str);
StringUtils::deserialize_uint32_t(collection_next_seq_id_str);
Collection* collection = init_collection(collection_meta, collection_next_seq_id);
@ -167,7 +167,7 @@ Option<Collection*> CollectionManager::create_collection(std::string name, const
next_collection_id++;
rocksdb::WriteBatch batch;
batch.Put(Collection::get_next_seq_id_key(name), std::to_string(0));
batch.Put(Collection::get_next_seq_id_key(name), StringUtils::serialize_uint32_t(0));
batch.Put(Collection::get_meta_key(name), collection_meta.dump());
batch.Put(NEXT_COLLECTION_ID_KEY, std::to_string(next_collection_id));
bool write_ok = store->batch_write(batch);

View File

@ -3,6 +3,7 @@
#include <vector>
#include <fstream>
#include <collection_manager.h>
#include "string_utils.h"
#include "collection.h"
class CollectionManagerTest : public ::testing::Test {
@ -77,7 +78,8 @@ TEST_F(CollectionManagerTest, CollectionCreation) {
store->get(CollectionManager::NEXT_COLLECTION_ID_KEY, next_collection_id);
ASSERT_EQ(3, num_keys);
ASSERT_EQ("1", next_seq_id); // we already call `collection1->get_next_seq_id` above, which is side-effecting
// we already call `collection1->get_next_seq_id` above, which is side-effecting
ASSERT_EQ(1, StringUtils::deserialize_uint32_t(next_seq_id));
ASSERT_EQ("{\"fields\":[{\"facet\":false,\"name\":\"title\",\"type\":\"string\"},"
"{\"facet\":false,\"name\":\"starring\",\"type\":\"string\"},"
"{\"facet\":true,\"name\":\"cast\",\"type\":\"string[]\"},"