mirror of
https://github.com/typesense/typesense.git
synced 2025-05-16 11:28:44 +08:00
Serialize and deserialize next sequence id properly instead of string encoding.
This commit is contained in:
parent
b73dbebd2d
commit
0f7530ed13
2
TODO.md
2
TODO.md
@ -90,8 +90,8 @@
|
||||
- ~~Use rocksdb batch put for atomic insertion~~
|
||||
- ~~Proper logging~~
|
||||
- ~~Handle store-get() not finding a key~~
|
||||
- ~~Deprecate converting integer to string verbatim~~
|
||||
- Deprecate union type punning
|
||||
- Deprecate converting integer to string verbatim
|
||||
- NOT operator support
|
||||
- > INT32_MAX validation for float field
|
||||
- highlight of string arrays?
|
||||
|
@ -78,8 +78,6 @@ public:
|
||||
|
||||
void increment_next_seq_id_field();
|
||||
|
||||
static uint32_t deserialize_seq_id_key(std::string serialized_seq_id);
|
||||
|
||||
Option<uint32_t> doc_id_to_seq_id(std::string doc_id);
|
||||
|
||||
std::vector<std::string> get_facet_fields();
|
||||
|
@ -161,7 +161,7 @@ public:
|
||||
|
||||
// last 4 bytes of the key would be the serialized version of the sequence id
|
||||
std::string serialized_seq_id = replication_event->key.substr(replication_event->key.length() - 4);
|
||||
uint32_t seq_id = Collection::deserialize_seq_id_key(serialized_seq_id);
|
||||
uint32_t seq_id = StringUtils::deserialize_uint32_t(serialized_seq_id);
|
||||
collection->index_in_memory(document, seq_id);
|
||||
}
|
||||
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include <rocksdb/options.h>
|
||||
#include <rocksdb/merge_operator.h>
|
||||
#include <rocksdb/transaction_log.h>
|
||||
#include "string_utils.h"
|
||||
#include "logger.h"
|
||||
|
||||
class UInt64AddOperator : public rocksdb::AssociativeMergeOperator {
|
||||
@ -19,9 +20,9 @@ public:
|
||||
std::string* new_value, rocksdb::Logger* logger) const override {
|
||||
uint64_t existing = 0;
|
||||
if (existing_value) {
|
||||
existing = (uint64_t) std::stoi(existing_value->ToString());
|
||||
existing = StringUtils::deserialize_uint32_t(existing_value->ToString());
|
||||
}
|
||||
*new_value = std::to_string(existing + std::stoi(value.ToString()));
|
||||
*new_value = StringUtils::serialize_uint32_t(existing + StringUtils::deserialize_uint32_t(value.ToString()));
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -134,7 +135,7 @@ public:
|
||||
}
|
||||
|
||||
void increment(const std::string & key, uint32_t value) {
|
||||
db->Merge(rocksdb::WriteOptions(), key, std::to_string(value));
|
||||
db->Merge(rocksdb::WriteOptions(), key, StringUtils::serialize_uint32_t(value));
|
||||
}
|
||||
|
||||
uint64_t get_latest_seq_number() const {
|
||||
|
@ -177,4 +177,20 @@ struct StringUtils {
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
static std::string serialize_uint32_t(uint32_t num) {
|
||||
unsigned char bytes[4];
|
||||
bytes[0] = (unsigned char) ((num >> 24) & 0xFF);
|
||||
bytes[1] = (unsigned char) ((num >> 16) & 0xFF);
|
||||
bytes[2] = (unsigned char) ((num >> 8) & 0xFF);
|
||||
bytes[3] = (unsigned char) ((num & 0xFF));
|
||||
|
||||
return std::string(bytes, bytes+4);
|
||||
}
|
||||
|
||||
static uint32_t deserialize_uint32_t(std::string serialized_num) {
|
||||
uint32_t seq_id = ((serialized_num[0] & 0xFF) << 24) | ((serialized_num[1] & 0xFF) << 16) |
|
||||
((serialized_num[2] & 0xFF) << 8) | (serialized_num[3] & 0xFF);
|
||||
return seq_id;
|
||||
}
|
||||
};
|
@ -731,19 +731,8 @@ std::string Collection::get_next_seq_id_key(const std::string & collection_name)
|
||||
std::string Collection::get_seq_id_key(uint32_t seq_id) {
|
||||
// We can't simply do std::to_string() because we want to preserve the byte order.
|
||||
// & 0xFF masks all but the lowest eight bits.
|
||||
unsigned char bytes[4];
|
||||
bytes[0] = (unsigned char) ((seq_id >> 24) & 0xFF);
|
||||
bytes[1] = (unsigned char) ((seq_id >> 16) & 0xFF);
|
||||
bytes[2] = (unsigned char) ((seq_id >> 8) & 0xFF);
|
||||
bytes[3] = (unsigned char) ((seq_id & 0xFF));
|
||||
|
||||
return get_seq_id_collection_prefix() + "_" + std::string(bytes, bytes+4);
|
||||
}
|
||||
|
||||
uint32_t Collection::deserialize_seq_id_key(std::string serialized_seq_id) {
|
||||
uint32_t seq_id = ((serialized_seq_id[0] & 0xFF) << 24) | ((serialized_seq_id[1] & 0xFF) << 16) |
|
||||
((serialized_seq_id[2] & 0xFF) << 8) | (serialized_seq_id[3] & 0xFF);
|
||||
return seq_id;
|
||||
const std::string & serialized_id = StringUtils::serialize_uint32_t(seq_id);
|
||||
return get_seq_id_collection_prefix() + "_" + serialized_id;
|
||||
}
|
||||
|
||||
std::string Collection::get_doc_id_key(const std::string & doc_id) {
|
||||
|
@ -83,7 +83,7 @@ Option<bool> CollectionManager::init(Store *store, const std::string & auth_key,
|
||||
}
|
||||
|
||||
uint32_t collection_next_seq_id = next_seq_id_status == StoreStatus::NOT_FOUND ? 0 :
|
||||
(const uint32_t) std::stoi(collection_next_seq_id_str);
|
||||
StringUtils::deserialize_uint32_t(collection_next_seq_id_str);
|
||||
|
||||
Collection* collection = init_collection(collection_meta, collection_next_seq_id);
|
||||
|
||||
@ -167,7 +167,7 @@ Option<Collection*> CollectionManager::create_collection(std::string name, const
|
||||
next_collection_id++;
|
||||
|
||||
rocksdb::WriteBatch batch;
|
||||
batch.Put(Collection::get_next_seq_id_key(name), std::to_string(0));
|
||||
batch.Put(Collection::get_next_seq_id_key(name), StringUtils::serialize_uint32_t(0));
|
||||
batch.Put(Collection::get_meta_key(name), collection_meta.dump());
|
||||
batch.Put(NEXT_COLLECTION_ID_KEY, std::to_string(next_collection_id));
|
||||
bool write_ok = store->batch_write(batch);
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include <vector>
|
||||
#include <fstream>
|
||||
#include <collection_manager.h>
|
||||
#include "string_utils.h"
|
||||
#include "collection.h"
|
||||
|
||||
class CollectionManagerTest : public ::testing::Test {
|
||||
@ -77,7 +78,8 @@ TEST_F(CollectionManagerTest, CollectionCreation) {
|
||||
store->get(CollectionManager::NEXT_COLLECTION_ID_KEY, next_collection_id);
|
||||
|
||||
ASSERT_EQ(3, num_keys);
|
||||
ASSERT_EQ("1", next_seq_id); // we already call `collection1->get_next_seq_id` above, which is side-effecting
|
||||
// we already call `collection1->get_next_seq_id` above, which is side-effecting
|
||||
ASSERT_EQ(1, StringUtils::deserialize_uint32_t(next_seq_id));
|
||||
ASSERT_EQ("{\"fields\":[{\"facet\":false,\"name\":\"title\",\"type\":\"string\"},"
|
||||
"{\"facet\":false,\"name\":\"starring\",\"type\":\"string\"},"
|
||||
"{\"facet\":true,\"name\":\"cast\",\"type\":\"string[]\"},"
|
||||
|
Loading…
x
Reference in New Issue
Block a user