/* * PrefixTree.h * * This source file is part of the FoundationDB open source project * * Copyright 2013-2018 Apple Inc. and the FoundationDB project authors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #pragma once #include "flow/flow.h" #include "flow/Arena.h" #include "fdbclient/FDBTypes.h" #include "fdbserver/Knobs.h" #include typedef uint64_t Word; static inline int commonPrefixLength(uint8_t const* ap, uint8_t const* bp, int cl) { int i = 0; const int wordEnd = cl - sizeof(Word) + 1; for(; i < wordEnd; i += sizeof(Word)) { Word a = *(Word *)ap; Word b = *(Word *)bp; if(a != b) { return i + ctzll(a ^ b) / 8; } ap += sizeof(Word); bp += sizeof(Word); } for (; i < cl; i++) { if (*ap != *bp) { return i; } ++ap; ++bp; } return cl; } static int commonPrefixLength(StringRef a, StringRef b) { return commonPrefixLength(a.begin(), b.begin(), std::min(a.size(), b.size())); } // This appears to be the fastest version static int lessOrEqualPowerOfTwo(int n) { int p; for (p = 1; p+p <= n; p+=p); return p; } /* static int _lessOrEqualPowerOfTwo(uint32_t n) { if(n == 0) return n; int trailing = __builtin_ctz(n); int leading = __builtin_clz(n); if(trailing + leading == ((sizeof(n) * 8) - 1)) return n; return 1 << ( (sizeof(n) * 8) - leading - 1); } static int __lessOrEqualPowerOfTwo(unsigned int n) { int p = 1; for(; p <= n; p <<= 1); return p >> 1; } */ static int perfectSubtreeSplitPoint(int subtree_size) { // return the inorder index of the root node in a subtree of the given size // consistent with the resulting binary search tree being "perfect" (having minimal height // and all missing nodes as far right as possible). // There has to be a simpler way to do this. int s = lessOrEqualPowerOfTwo((subtree_size - 1) / 2 + 1) - 1; return std::min(s * 2 + 1, subtree_size - s - 1); } static int perfectSubtreeSplitPointCached(int subtree_size) { static uint16_t *points = nullptr; static const int max = 500; if(points == nullptr) { points = new uint16_t[max]; for(int i = 0; i < max; ++i) points[i] = perfectSubtreeSplitPoint(i); } if(subtree_size < max) return points[subtree_size]; return perfectSubtreeSplitPoint(subtree_size); } struct PrefixTree { // TODO: Make PrefixTree use a more complex record type with a multi column key typedef KeyValueRef EntryRef; typedef Standalone Entry; static int MaximumTreeSize() { return std::numeric_limits::max(); }; struct Node { uint8_t flags; /* * Node fields * * Logically, a node has the following things * - Flags describing what is in the node * - Optional left child * - Optional right child * - Prefix string, described by a length and a source (which is the most recent left or right ancestor) * - Optional split string, which contains any bytes after prefix which are needed to make a branching decision * - Optional suffix string, containing any remaining key bytes after the split string * - Optional value string * * The physical layout places the left child subtree immediately after the split string so that it is likely * that the bytes read to make a branching decision and then choosing left (as should happen half of the time) * will have a high cache hit rate. * * If necessary, the flags byte could be an enumeration into a set of possible options, since not all options * combinations are needed. For example, * * - The tree is balanced and filled from the left at the last level, so a node cannot have only a right child. * - If there are no children, there is no point in splitting any key bytes after the prefix into separate strings. * - If there is exactly one child (left) then the key bytes after the prefix can all go in the split string. The * traversal decision is to either stop or go left and one of those options (stop) will still have good memory * locality. * * 8 valid/necessary option combinations for presense of (Left, Right, Split, Suffix) out of 16 possibilities * * L R Split Suffix * * N N N N # No children, key has no bytes after prefix * N N Y N # No children, key has bytes after prefix * Y N N N # One child, key has no bytes after prefix * Y N Y N # One child, key has bytes after prefix * Y Y N N # Two children, key has no bytes after prefix * Y Y N Y # Two children, branch decision can be made using only prefix bytes but there are more key bytes after * Y Y Y N # Two children, branch decision requires all key bytes after prefix * Y Y Y Y # Two children, branch decision requires some but not all bytes after prefix * * This can be represent with just 3 bits, if necessary, but for now there is space in the flags byte for all 4. * * Flag Bits * * prefix borrow from next * true - borrow from the closest ancestor greater than this node * false - borrow from the closest ancestor less than this node * large lengths = use 2 byte ints instead of 1 byte for prefix, split, suffix, and value lengths * (TODO: It might be better to just not use a suffix at all when large is lengths is set) * left child present * right child present * split string present * suffix string present * value string present * * Serialized format: * All lengths are in the header, which has variable size * * flags 1 byte * prefix length 1-2 bytes based on large lengths flag * split length 0-2 bytes based on split string present flag * suffix length 0-2 bytes based on suffix string present and large lengths flags * value length 0-1 bytes based on value string present and large lengths flag * left length 0 or 2 bytes depending on left child present * split 0+ bytes * left child 0+ bytes * suffix 0+ bytes * value 0+ bytes * right child 0+ bytes * */ enum EFlags { USE_LARGE_LENGTHS = 1 << 0, PREFIX_SOURCE_NEXT = 1 << 1, HAS_LEFT_CHILD = 1 << 2, HAS_RIGHT_CHILD = 1 << 3, HAS_SPLIT = 1 << 4, HAS_SUFFIX = 1 << 5, HAS_VALUE = 1 << 6 }; // Stores decoded offsets (from beginning) of Node components struct Parser { Parser() {} Parser(const Node *n) { init(n); } const Node *node; typedef uint16_t OffsetT; OffsetT headerLen; OffsetT prefixLen; OffsetT leftPos; OffsetT suffixPos; OffsetT valuePos; OffsetT rightPos; StringRef splitString() const { return StringRef((const uint8_t *)node + headerLen, leftPos); } StringRef suffixString() const { return StringRef((const uint8_t *)node + headerLen + suffixPos, valuePos - suffixPos); } StringRef valueString() const { return StringRef((const uint8_t *)node + headerLen + valuePos, rightPos - valuePos); } const Node *leftChild() const { if(node->flags & HAS_LEFT_CHILD) return (const Node *)((const uint8_t *)node + headerLen + leftPos); return nullptr; } const Node *rightChild() const { if(node->flags & HAS_RIGHT_CHILD) return (const Node *)((const uint8_t *)node + headerLen + rightPos); return nullptr; } int keyLen() const { int len = prefixLen + leftPos + (valuePos - suffixPos); ASSERT(len >= 0); return len; } void init(const Node *n) { node = n; union { const uint8_t *p8; const uint16_t *p16; }; p8 = (const uint8_t *)&n->flags + 1; int flags = n->flags; bool large = flags & USE_LARGE_LENGTHS; prefixLen = large ? *p16++ : *p8++; if(flags & HAS_SPLIT) leftPos = large ? *p16++ : *p8++; else leftPos = 0; suffixPos = leftPos; if(flags & HAS_LEFT_CHILD) suffixPos += *p16++; valuePos = suffixPos; if(flags & HAS_SUFFIX) valuePos += (large ? *p16++ : *p8++); rightPos = valuePos; if(flags & HAS_VALUE) rightPos += (large ? *p16++ : *p8++); int header = 2; // flags byte, first prefix len byte if(large) ++header; // second prefix len byte if(flags & HAS_SPLIT) header += large ? 2 : 1; if(flags & HAS_LEFT_CHILD) header += 2; if(flags & HAS_SUFFIX) header += large ? 2 : 1; if(flags & HAS_VALUE) header += large ? 2 : 1; headerLen = header; } }; static inline int getMaxOverhead(int index, int keySize, int valueSize) { bool large = keySize > 255 || valueSize > 255; int overhead = 1 + (large ? 2 : 1); // flags and prefix len // Value length size if present if(valueSize > 0) overhead += large ? 2 : 1; overhead += large ? 6 : 3; // Worst case scenario for value, split and suffix lengths if((index & 0x01) != 0) overhead += 2; // Left child length, one less than half of nodes will have one. return overhead; } public: // Methods for decoding specific Node members on-demand inline int getPrefixLen() const { return Parser(this).prefixLen; } inline StringRef getSplitString() const { return Parser(this).splitString(); } inline StringRef getSuffixString() const { return Parser(this).suffixString(); } inline StringRef getValueString() const { return Parser(this).valueString(); } inline const Node * getLeftChild() const { return Parser(this).leftChild(); } inline const Node * getRightChild() const { return Parser(this).rightChild(); } inline int getKeySize() const { return Parser(this).keyLen(); } }; #pragma pack(push,1) uint16_t size; // size in bytes Node root; #pragma pack(pop) static inline int GetHeaderSize() { return sizeof(PrefixTree) - sizeof(root); } private: struct PathEntry { const Node *node; Node::Parser parser; // Key may or may not point to the space within keyBuffer. // Key will always contain at least the prefix bytes borrowed by node // KeyBuffer will always be large enough to hold the entire reconstituted key for node // // These are mutable because getting key bytes from this PathEntry can change these // but they're really just a read cache for reconstituted key bytes. mutable StringRef key; mutable Standalone> keyBuffer; // Path entry was reached by going left from the previous node bool nodeIsLeftChild; // number of consecutive moves in same direction int moves; PathEntry() : node(nullptr) { } PathEntry(const PathEntry &rhs) { *this = rhs; } // Initialize the key byte buffer to hold bytes of a new node. Use a new arena // if the old arena is being held by any users. void initKeyBufferSpace() { if(node != nullptr) { int size = parser.keyLen(); if(keyBuffer.arena().impl && !keyBuffer.arena().impl->isSoleOwnerUnsafe()) { keyBuffer = Standalone>(); } keyBuffer.reserve(keyBuffer.arena(), size); } } PathEntry & operator= (const PathEntry &rhs) { node = rhs.node; parser = rhs.parser; nodeIsLeftChild = rhs.nodeIsLeftChild; moves = rhs.moves; // New key buffer must be able to hold full reconstituted key, not just the // part of it referenced by rhs.key (which may not be the whole thing) initKeyBufferSpace(); if(node != nullptr && rhs.key.size() > 0) { // Copy rhs.key into keyBuffer and set key to the destination bytes memcpy(keyBuffer.begin(), rhs.key.begin(), rhs.key.size()); key = StringRef(keyBuffer.begin(), rhs.key.size()); } else { key = rhs.key; } return *this; } void init(StringRef s) { node = nullptr; key = s; } void init(const Node *_node, const PathEntry *prefixSource, bool isLeft, int numMoves) { node = _node; parser.init(node); nodeIsLeftChild = isLeft; moves = numMoves; // keyBuffer will be large enough to hold the full reconstituted key but initially // key will be a reference returned from prefixSource->getKeyRef() // See comments near keyBuffer and key for more info. initKeyBufferSpace(); key = prefixSource->getKeyRef(parser.prefixLen); } inline bool valid() const { return node != nullptr; } int compareToKey(StringRef s) const { // Key has at least this node's borrowed prefix bytes in it. // If s is shorter than key, we only need to compare it to key if(s.size() < key.size()) return s.compare(key); int cmp = s.substr(0, key.size()).compare(key); if(cmp != 0) return cmp; // The borrowed prefix bytes and possibly more have already been compared and were equal int comparedLen = key.size(); s = s.substr(comparedLen); StringRef split = parser.splitString(); int splitSizeOriginal = split.size(); int splitStart = comparedLen - parser.prefixLen; if(splitStart < split.size()) { split = split.substr(splitStart); if(s.size() < split.size()) return s.compare(split); cmp = s.substr(0, split.size()).compare(split); if(cmp != 0) return cmp; s = s.substr(split.size()); comparedLen += split.size(); } int suffixStart = comparedLen - (parser.prefixLen + splitSizeOriginal); StringRef suffix = parser.suffixString(); ASSERT(suffixStart >= 0 && suffixStart <= suffix.size()); return s.compare(suffix.substr(suffixStart)); } // Make sure that key refers to bytes in keyBuffer, copying if necessary void ensureKeyInBuffer() const { if(key.begin() != keyBuffer.begin()) { memcpy(keyBuffer.begin(), key.begin(), key.size()); key = StringRef(keyBuffer.begin(), key.size()); } } // Get the borrowed prefix string. Key must contain all of those bytes but it could contain more. StringRef getPrefix() const { if(node == nullptr) return key; return key.substr(0, parser.prefixLen); } // Return a reference to the first size bytes of the key. // // If size <= key's size then a substring of key will be returned, but if alwaysUseKeyBuffer // is true then before returning the existing value of key (not just the first size bytes) // will be copied into keyBuffer and key will be updated to point there. // // If size is greater than key's size, then key will be moved into keyBuffer if it is not already there // and the remaining needed bytes will be copied into keyBuffer from the split and suffix strings. KeyRef getKeyRef(int size = -1, bool alwaysUseKeyBuffer = false) const { if(size < 0) size = parser.keyLen(); // If size is less than key then return a substring of it, possibly after moving it to the keyBuffer. if(size <= key.size()) { if(alwaysUseKeyBuffer) ensureKeyInBuffer(); return key.substr(0, size); } ASSERT(node != nullptr); ensureKeyInBuffer(); // The borrowed prefix bytes and possibly more must already be in key int writtenLen = key.size(); StringRef split = parser.splitString(); StringRef suffix = parser.suffixString(); int splitStart = writtenLen - parser.prefixLen; if(splitStart < split.size()) { int splitLen = std::min(split.size() - splitStart, size - writtenLen); memcpy(mutateString(key) + writtenLen, split.begin() + splitStart, splitLen); writtenLen += splitLen; } int suffixStart = writtenLen - parser.prefixLen - split.size(); if(suffixStart < suffix.size()) { int suffixLen = std::min(suffix.size() - suffixStart, size - writtenLen); memcpy(mutateString(key) + writtenLen, suffix.begin() + suffixStart, suffixLen); writtenLen += suffixLen; } ASSERT(writtenLen == size); key = StringRef(key.begin(), size); return key; } // Return keyRef(size) and the arena that keyBuffer resides in. Key getKey(int size = -1) const { StringRef k = getKeyRef(size, true); return Key(k, keyBuffer.arena()); } }; public: // Cursor provides a way to seek into a PrefixTree and iterate over its content // Seek and move methods can return false can return false if they fail to achieve the desired effect // but a cursor will remain 'valid' as long as the tree is not empty. // // It coalesces prefix bytes into a contiguous buffer for each node along the traversal // path to make iteration faster. struct Cursor { Cursor() : pathLen(0) { } Cursor(const Node *root, StringRef prevAncestor, StringRef nextAncestor) { init(root, prevAncestor, nextAncestor); } static const int initialPathLen = 3; static const int initialPathCapacity = 20; // This is a separate function so that Cursors can be reused to search different PrefixTrees // which avoids cursor destruction and creation which involves unnecessary memory churn. // The root node is arbitrarily assumed to be a right child of prevAncestor which itself is a left child of nextAncestor void init(const Node *root, StringRef prevAncestor, StringRef nextAncestor) { if(path.size() < initialPathCapacity) path.resize(initialPathCapacity); pathLen = initialPathLen; path[0].init(nextAncestor); path[1].init(prevAncestor); path[2].init(root, &path[root->flags & Node::PREFIX_SOURCE_NEXT ? 0 : 1], false, 1); } bool operator == (const Cursor &rhs) const { return pathBack().node == rhs.pathBack().node; } StringRef leftParentBoundary; StringRef rightParentBoundary; std::vector path; // pathLen is the number of elements in path which are in use. This is to prevent constantly destroying // and constructing PathEntry objects which would unnecessarily churn through memory in Arena for storing // coalesced prefixes. int pathLen; bool valid() const { return pathLen != 0 && pathBack().valid(); } // Get a reference to the current key which is valid until the Cursor is moved. KeyRef getKeyRef() const { return pathBack().getKeyRef(); } // Get a Standalone for the current key which will still be valid after the Cursor is moved. Key getKey() const { return pathBack().getKey(); } // Get a reference to the current value which is valid as long as the Cursor's page memory exists. ValueRef getValueRef() const { return pathBack().parser.valueString(); } // Get a key/value reference that is valid until the Cursor is moved. EntryRef getKVRef() const { return EntryRef(getKeyRef(), getValueRef()); } // Returns a standalone EntryRef where both key and value exist in the standalone's arena, // unless copyValue is false in which case the value will be a reference into tree memory. Entry getKV(bool copyValue = true) const { Key k = getKey(); ValueRef v = getValueRef(); if(copyValue) v = ValueRef(k.arena(), getValueRef()); return Entry(EntryRef(k, v), k.arena()); } // Moves the cursor to the node with the greatest key less than or equal to s. If successful, // returns true, otherwise returns false and the cursor will be at the node with the next key // greater than s. bool seekLessThanOrEqual(StringRef s) { if(pathLen == 0) return false; pathLen = initialPathLen; // TODO: Track position of difference and use prefix reuse bytes and prefix sources // to skip comparison of some prefix bytes when possible while(1) { const PathEntry &p = pathBack(); const Node *right = p.parser.rightChild(); _mm_prefetch((const char*)right, _MM_HINT_T0); int cmp = p.compareToKey(s); if(cmp == 0) return true; if(cmp < 0) { // Try to traverse left const Node *left = p.parser.leftChild(); if(left == nullptr) { // If we're at the root, cursor should now be before the first element if(pathLen == initialPathLen) { return false; } if(p.nodeIsLeftChild) { // If we only went left, cursor should now be before the first element if((p.moves + initialPathLen) == pathLen) { return false; } // Otherwise, go to the parent of the last right child traversed, // which is the last node from which we went right popPath(p.moves + 1); return true; } // p.directionLeft is false, so p.node is a right child, so go to its parent. popPath(1); return true; } int newMoves = p.nodeIsLeftChild ? p.moves + 1 : 1; const PathEntry *borrowSource = (left->flags & Node::PREFIX_SOURCE_NEXT) ? &p : &p - newMoves; pushPath(left, borrowSource, true, newMoves); } else { // Try to traverse right if(right == nullptr) { return true; } int newMoves = p.nodeIsLeftChild ? 1 : p.moves + 1; const PathEntry *borrowSource = (right->flags & Node::PREFIX_SOURCE_NEXT) ? &p - newMoves : &p; pushPath(right, borrowSource, false, newMoves); } } } inline const PathEntry &pathBack() const { return path[pathLen - 1]; } inline PathEntry &pathBack() { return path[pathLen - 1]; } inline void pushPath(const Node *node, const PathEntry *borrowSource, bool left, int moves) { ++pathLen; if(path.size() < pathLen) { path.resize(pathLen); } pathBack().init(node, borrowSource, left, moves); } inline void popPath(int n) { pathLen -= n; } std::string pathToString() const { std::string s; for(int i = 0; i < pathLen; ++i) { s += format("(%d: ", i); const Node *node = path[i].node; if(node != nullptr) { s += "childDir="; s += (path[i].nodeIsLeftChild ? "left " : "right "); } s += format("prefix='%s'", path[i].getPrefix().toHexString(20).c_str()); if(node != nullptr) { s += format(" split='%s' suffix='%s' value='%s'", node->getSplitString().toHexString(20).c_str(), node->getSuffixString().toHexString(20).c_str(), node->getValueString().toHexString(20).c_str()); } else s += ") "; } return s; } bool moveFirst() { if(pathLen == 0) return false; pathLen = initialPathLen; while(1) { const PathEntry &p = pathBack(); const Node *left = p.parser.leftChild(); if(left == nullptr) break; // TODO: This can be simpler since it only goes left int newMoves = p.nodeIsLeftChild ? p.moves + 1 : 1; const PathEntry *borrowSource = (left->flags & Node::PREFIX_SOURCE_NEXT) ? &p : &p - newMoves; pushPath(left, borrowSource, true, newMoves); } return true; } bool moveLast() { if(pathLen == 0) return false; pathLen = initialPathLen; while(1) { const PathEntry &p = pathBack(); const Node *right = p.parser.rightChild(); if(right == nullptr) break; // TODO: This can be simpler since it only goes right int newMoves = p.nodeIsLeftChild ? 1 : p.moves + 1; const PathEntry *borrowSource = (right->flags & Node::PREFIX_SOURCE_NEXT) ? &p - newMoves : &p; pushPath(right, borrowSource, false, newMoves); } return true; } bool moveNext() { const PathEntry &p = pathBack(); // If p isn't valid if(!p.valid()) { return false; } const Node *right = p.parser.rightChild(); // If we can't go right, then go upward to the parent of the last left child if(right == nullptr) { // If current node was a left child then pop one node and we're done if(p.nodeIsLeftChild) { popPath(1); return true; } // Current node is a right child. // If we are at the rightmost tree node return false and don't move. if(p.moves + initialPathLen - 1 == pathLen) { return false; } // Truncate path to the parent of the last left child popPath(p.moves + 1); return true; } // Go right int newMoves = p.nodeIsLeftChild ? 1 : p.moves + 1; const PathEntry *borrowSource = (right->flags & Node::PREFIX_SOURCE_NEXT) ? &p - newMoves : &p; pushPath(right, borrowSource, false, newMoves); // Go left as far as possible while(1) { const PathEntry &p = pathBack(); const Node *left = p.parser.leftChild(); if(left == nullptr) { return true; } int newMoves = p.nodeIsLeftChild ? p.moves + 1 : 1; const PathEntry *borrowSource = (left->flags & Node::PREFIX_SOURCE_NEXT) ? &p : &p - newMoves; pushPath(left, borrowSource, true, newMoves); } } bool movePrev() { const PathEntry &p = pathBack(); // If p isn't valid if(!p.valid()) { return false; } const Node *left = p.parser.leftChild(); // If we can't go left, then go upward to the parent of the last right child if(left == nullptr) { // If current node was a right child if(!p.nodeIsLeftChild) { // If we are at the root then don't move and return false. if(pathLen == initialPathLen) return false; // Otherwise, pop one node from the path and return true. popPath(1); return true; } // Current node is a left child. // If we are at the leftmost tree node then return false and don't move. if(p.moves + 3 == pathLen) { return false; } // Truncate path to the parent of the last right child popPath(p.moves + 1); return true; } // Go left int newMoves = p.nodeIsLeftChild ? p.moves + 1 : 1; const PathEntry *borrowSource = (left->flags & Node::PREFIX_SOURCE_NEXT) ? &p : &p - newMoves; pushPath(left, borrowSource, true, newMoves); // Go right as far as possible while(1) { const PathEntry &p = pathBack(); const Node *right = p.parser.rightChild(); if(right == nullptr) { return true; } int newMoves = p.nodeIsLeftChild ? 1 : p.moves + 1; const PathEntry *borrowSource = (right->flags & Node::PREFIX_SOURCE_NEXT) ? &p - newMoves : &p; pushPath(right, borrowSource, false, newMoves); } } }; Cursor getCursor(StringRef prevAncestor, StringRef nextAncestor) const { return (size != 0) ? Cursor(&root, prevAncestor, nextAncestor) : Cursor(); } static std::string escapeForDOT(StringRef s) { std::string r = "\""; for(char c : s) { if(c == '\n') r += "\\n"; else if(isprint(c) && c != '"') r += c; else r += format("{%02X}", c); } return r + '"'; } std::string toDOT(StringRef prevAncestor, StringRef nextAncestor) const { auto c = getCursor(prevAncestor, nextAncestor); c.moveFirst(); std::string r; r += format("digraph PrefixTree%p {\n", this); do { const PathEntry &p = c.pathBack(); const Node *n = p.node; const Node *left = p.parser.leftChild(); const Node *right = p.parser.rightChild(); std::string label = escapeForDOT(format("PrefixSource: %s\nPrefix: [%s]\nSplit: %s\nSuffix: %s", n->flags & Node::PREFIX_SOURCE_NEXT ? "Left" : "Right", p.getPrefix().toString().c_str(), p.parser.splitString().toString().c_str(), p.parser.suffixString().toString().c_str() )); r += format("node%p [ label = %s ];\nnode%p -> { %s %s };\n", n, label.c_str(), n, left ? format("node%p", left).c_str() : "", right ? format("node%p", right).c_str() : "" ); } while(c.moveNext()); r += "}\n"; return r; } // Returns number of bytes written int build(const EntryRef *begin, const EntryRef *end, StringRef prevAncestor, StringRef nextAncestor) { // The boundary leading to the new page acts as the last time we branched right if(begin == end) { size = 0; } else { size = sizeof(size) + build(root, begin, end, nextAncestor, prevAncestor); } ASSERT(size <= MaximumTreeSize()); return size; } private: static uint16_t build(Node &root, const EntryRef *begin, const EntryRef *end, const StringRef &nextAncestor, const StringRef &prevAncestor) { ASSERT(end != begin); int count = end - begin; // Find key to be stored in root int mid = perfectSubtreeSplitPointCached(count); const StringRef &key = begin[mid].key; const StringRef &val = begin[mid].value; // Since key must be between lastLeft and lastRight, any common prefix they share must be shared by key // so rather than comparing all of key to each one separately we can just compare lastLeft and lastRight // to each other and then skip over the resulting length in key int nextPrevCommon = commonPrefixLength(nextAncestor.begin(), prevAncestor.begin(), std::min(nextAncestor.size(), prevAncestor.size())); // Pointer to remainder of key after the left/right common bytes const uint8_t *keyExt = key.begin() + nextPrevCommon; // Find out how many bytes beyond leftRightCommon key has with each last left/right string separately int extNext = commonPrefixLength(keyExt, nextAncestor.begin() + nextPrevCommon, std::min(key.size(), nextAncestor.size()) - nextPrevCommon); int extPrev = commonPrefixLength(keyExt, prevAncestor.begin() + nextPrevCommon, std::min(key.size(), prevAncestor.size()) - nextPrevCommon); // Use the longer result bool prefixSourceNext = extNext > extPrev; int prefixLen = nextPrevCommon + (prefixSourceNext ? extNext : extPrev); int splitLen; // Bytes after prefix required to make traversal decision int suffixLen; // Remainder of key bytes after split key portion //printf("build: '%s'\n prefixLen %d prefixSourceNext %d\n", key.toHexString(20).c_str(), prefixLen, prefixSourceNext); // 2 entries or less means no right child, so just put all remaining key bytes into split string. if(count < 3) { splitLen = key.size() - prefixLen; suffixLen = 0; } else { // There are 2 children // Avoid using the suffix at all if the remainder is small enough. splitLen = key.size() - prefixLen; if(splitLen < SERVER_KNOBS->PREFIX_TREE_IMMEDIATE_KEY_SIZE_LIMIT) { suffixLen = 0; } else { // Remainder of the key was not small enough to put entirely before the left child, so find the actual required to make the branch decision const StringRef &prevKey = begin[mid - 1].key; splitLen = commonPrefixLength(key.begin(), prevKey.begin(), std::min(key.size(), prevKey.size())) + 1 - prefixLen; // Put at least the minimum immediate byte count in the split key (before the left child) if(splitLen < SERVER_KNOBS->PREFIX_TREE_IMMEDIATE_KEY_SIZE_MIN) splitLen = std::min(key.size() - prefixLen, SERVER_KNOBS->PREFIX_TREE_IMMEDIATE_KEY_SIZE_MIN); suffixLen = key.size() - splitLen - prefixLen; } } // We now know enough about the fields present and their lengths to set the flag bits and write a header // If any int is more than 8 bits then use large ints bool large = prefixLen > 255 || splitLen > 255 || suffixLen > 255 || val.size() > 255; root.flags = large ? Node::USE_LARGE_LENGTHS : 0; if(prefixSourceNext) root.flags |= Node::PREFIX_SOURCE_NEXT; union { uint8_t *p8; uint16_t *p16; }; p8 = &root.flags + 1; if(large) *p16++ = prefixLen; else *p8++ = prefixLen; if(splitLen > 0) { root.flags |= Node::HAS_SPLIT; if(large) *p16++ = splitLen; else *p8++ = splitLen; } uint16_t *pLeftLen = p16; if(count > 1) { ++p16; } if(suffixLen > 0) { root.flags |= Node::HAS_SUFFIX; if(large) *p16++ = suffixLen; else *p8++ = suffixLen; } if(val.size() > 0) { root.flags |= Node::HAS_VALUE; if(large) *p16++ = val.size(); else *p8++ = val.size(); } // Header is written, now write strings and children in order. const uint8_t *keyPtr = key.begin() + prefixLen; // Serialize split bytes if(splitLen > 0) { memcpy(p8, keyPtr, splitLen); p8 += splitLen; keyPtr += splitLen; } // Serialize left child if(count > 1) { root.flags |= Node::HAS_LEFT_CHILD; int leftLen = build(*(Node *)(p8), begin, begin + mid, key, prevAncestor); *pLeftLen = leftLen; p8 += leftLen; } // Serialize suffix bytes if(suffixLen > 0) { memcpy(p8, keyPtr, suffixLen); p8 += suffixLen; } // Serialize value bytes if(val.size() > 0) { memcpy(p8, val.begin(), val.size()); p8 += val.size(); } // Serialize right child if(count > 2) { root.flags |= Node::HAS_RIGHT_CHILD; int rightLen = build(*(Node *)(p8), begin + mid + 1, end, nextAncestor, key); p8 += rightLen; } /* printf("\nBuilt: key '%s' c %d p %d spl %d suf %d\nRaw: %s\n", key.toString().c_str(), count, prefixLen, splitLen, suffixLen, StringRef(&root.flags, p8 - &root.flags).toHexString(20).c_str()); Node::Parser p(&root); printf("parser: headerLen %d prefixLen %d leftPos %d rightPos %d split %s suffix %s val %s\n", p.headerLen, p.prefixLen, p.leftPos, p.rightPos, p.splitString().toString().c_str(), p.suffixString().toString().c_str(), p.valueString().toString().c_str()); */ return p8 - (uint8_t *)&root; } };