/* * SkipList.cpp * * This source file is part of the FoundationDB open source project * * Copyright 2013-2022 Apple Inc. and the FoundationDB project authors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include #include #include #include #include #include #include "flow/Platform.h" #include "fdbrpc/fdbrpc.h" #include "fdbrpc/PerfMetric.h" #include "fdbclient/FDBTypes.h" #include "fdbclient/KeyRangeMap.h" #include "fdbclient/SystemData.h" #include "fdbserver/ConflictSet.h" static std::vector skc; static thread_local uint32_t g_seed = 0; static inline int skfastrand() { g_seed = g_seed * 1664525L + 1013904223L; return g_seed; } PerfDoubleCounter g_buildTest("Build", skc), g_add("Add", skc), g_detectConflicts("Detect", skc), g_sort("D.Sort", skc), g_combine("D.Combine", skc), g_checkRead("D.CheckRead", skc), g_checkBatch("D.CheckIntraBatch", skc), g_merge("D.MergeWrite", skc), g_removeBefore("D.RemoveBefore", skc); static force_inline int compare(const StringRef& a, const StringRef& b) { int c = memcmp(a.begin(), b.begin(), std::min(a.size(), b.size())); if (c < 0) return -1; if (c > 0) return +1; if (a.size() < b.size()) return -1; if (a.size() == b.size()) return 0; return +1; } struct ReadConflictRange { StringRef begin, end; Version version; int transaction; int indexInTx; VectorRef* conflictingKeyRange; Arena* cKRArena; ReadConflictRange(StringRef begin, StringRef end, Version version, int transaction, int indexInTx, VectorRef* cKR = nullptr, Arena* cKRArena = nullptr) : begin(begin), end(end), version(version), transaction(transaction), indexInTx(indexInTx), conflictingKeyRange(cKR), cKRArena(cKRArena) {} bool operator<(const ReadConflictRange& rhs) const { return compare(begin, rhs.begin) < 0; } }; struct KeyInfo { StringRef key; int* pIndex; bool begin; bool write; int transaction; KeyInfo() = default; KeyInfo(StringRef key, bool begin, bool write, int transaction, int* pIndex) : key(key), pIndex(pIndex), begin(begin), write(write), transaction(transaction) {} }; force_inline int extra_ordering(const KeyInfo& ki) { return ki.begin * 2 + (ki.write ^ ki.begin); } // returns true if done with string force_inline bool getCharacter(const KeyInfo& ki, int character, int& outputCharacter) { // normal case if (character < ki.key.size()) { outputCharacter = 5 + ki.key.begin()[character]; return false; } // termination if (character == ki.key.size()) { outputCharacter = 0; return false; } if (character == ki.key.size() + 1) { // end/begin+read/write relative sorting outputCharacter = extra_ordering(ki); return false; } outputCharacter = 0; return true; } bool operator<(const KeyInfo& lhs, const KeyInfo& rhs) { int i = std::min(lhs.key.size(), rhs.key.size()); int c = memcmp(lhs.key.begin(), rhs.key.begin(), i); if (c != 0) return c < 0; // Always sort shorter keys before longer keys. if (lhs.key.size() < rhs.key.size()) { return true; } if (lhs.key.size() > rhs.key.size()) { return false; } // When the keys are the same length, use the extra ordering constraint. return extra_ordering(lhs) < extra_ordering(rhs); } bool operator==(const KeyInfo& lhs, const KeyInfo& rhs) { return !(lhs < rhs || rhs < lhs); } void swapSort(std::vector& points, int a, int b) { if (points[b] < points[a]) { KeyInfo temp; temp = points[a]; points[a] = points[b]; points[b] = temp; } } void smallSort(std::vector& points, int start, int N) { for (int i = 1; i < N; i++) for (int j = i; j > 0; j -= 2) swapSort(points, start + j - 1, start + j); for (int i = N - 2; i > 0; i--) for (int j = i; j > 0; j -= 2) swapSort(points, start + j - 1, start + j); } struct SortTask { int begin; int size; int character; SortTask(int begin, int size, int character) : begin(begin), size(size), character(character) {} }; void sortPoints(std::vector& points) { std::vector tasks; std::vector newPoints; std::vector counts; tasks.emplace_back(0, points.size(), 0); while (tasks.size()) { SortTask st = tasks.back(); tasks.pop_back(); if (st.size < 10) { // smallSort(points, st.begin, st.size); std::sort(points.begin() + st.begin, points.begin() + st.begin + st.size); continue; } newPoints.resize(st.size); counts.assign(256 + 5, 0); // get counts int c; bool allDone = true; for (int i = st.begin; i < st.begin + st.size; i++) { allDone &= getCharacter(points[i], st.character, c); counts[c]++; } if (allDone) continue; // calculate offsets from counts and build next level of tasks int total = 0; for (int i = 0; i < counts.size(); i++) { int temp = counts[i]; if (temp > 1) tasks.emplace_back(st.begin + total, temp, st.character + 1); counts[i] = total; total += temp; } // put in their places for (int i = st.begin; i < st.begin + st.size; i++) { getCharacter(points[i], st.character, c); newPoints[counts[c]++] = points[i]; } // copy back into original points array for (int i = 0; i < st.size; i++) points[st.begin + i] = newPoints[i]; } } class SkipList : NonCopyable { private: static constexpr int MaxLevels = 26; int randomLevel() const { uint32_t i = uint32_t(skfastrand()) >> (32 - (MaxLevels - 1)); int level = 0; while (i & 1) { i >>= 1; level++; } ASSERT(level < MaxLevels); return level; } // Represent a node in the SkipList. The node has multiple (i.e., level) pointers to // other nodes, and keeps a record of the max versions for each level. struct Node { int level() const { return nPointers - 1; } uint8_t* value() { return end() + nPointers * (sizeof(Node*) + sizeof(Version)); } int length() const { return valueLength; } // Returns the next node pointer at the given level. Node* getNext(int level) { return *((Node**)end() + level); } // Sets the next node pointer at the given level. void setNext(int level, Node* n) { *((Node**)end() + level) = n; } // Returns the max version at the given level. Version getMaxVersion(int i) const { return ((Version*)(end() + nPointers * sizeof(Node*)))[i]; } // Sets the max version at the given level. void setMaxVersion(int i, Version v) { ((Version*)(end() + nPointers * sizeof(Node*)))[i] = v; } // Return a node with initialized value but uninitialized pointers // Memory layout: *this, (level+1) Node*, (level+1) Version, value static Node* create(const StringRef& value, int level) { int nodeSize = sizeof(Node) + value.size() + (level + 1) * (sizeof(Node*) + sizeof(Version)); Node* n; if (nodeSize <= 64) { n = (Node*)FastAllocator<64>::allocate(); INSTRUMENT_ALLOCATE("SkipListNode64"); } else if (nodeSize <= 128) { n = (Node*)FastAllocator<128>::allocate(); INSTRUMENT_ALLOCATE("SkipListNode128"); } else { n = (Node*)new char[nodeSize]; INSTRUMENT_ALLOCATE("SkipListNodeLarge"); } n->nPointers = level + 1; n->valueLength = value.size(); if (value.size() > 0) { memcpy(n->value(), value.begin(), value.size()); } return n; } // pre: level>0, all lower level nodes between this and getNext(level) have correct maxversions void calcVersionForLevel(int level) { Node* end = getNext(level); Version v = getMaxVersion(level - 1); for (Node* x = getNext(level - 1); x != end; x = x->getNext(level - 1)) v = std::max(v, x->getMaxVersion(level - 1)); setMaxVersion(level, v); } void destroy() { int nodeSize = getNodeSize(); if (nodeSize <= 64) { FastAllocator<64>::release(this); INSTRUMENT_RELEASE("SkipListNode64"); } else if (nodeSize <= 128) { FastAllocator<128>::release(this); INSTRUMENT_RELEASE("SkipListNode128"); } else { delete[](char*) this; INSTRUMENT_RELEASE("SkipListNodeLarge"); } } private: int getNodeSize() const { return sizeof(Node) + valueLength + nPointers * (sizeof(Node*) + sizeof(Version)); } // Returns the first Node* pointer uint8_t* end() { return (uint8_t*)(this + 1); } uint8_t const* end() const { return (uint8_t const*)(this + 1); } int nPointers, valueLength; }; static force_inline bool less(const uint8_t* a, int aLen, const uint8_t* b, int bLen) { int c = memcmp(a, b, std::min(aLen, bLen)); if (c < 0) return true; if (c > 0) return false; return aLen < bLen; } Node* header; void destroy() { Node *next, *x; for (x = header; x; x = next) { next = x->getNext(0); x->destroy(); } } public: // Points the location (i.e., Node*) that value would appear in the SkipList. // If the "value" is in the list, then finger[0] points to that exact node; // otherwise, the finger points to Nodes that the value should be inserted before. // Note the SkipList organizes all nodes at level 0, higher levels contain jump pointers. struct Finger { Node* finger[MaxLevels]; // valid for levels >= level int level = MaxLevels; Node* x = nullptr; Node* alreadyChecked = nullptr; StringRef value; Finger() = default; Finger(Node* header, const StringRef& ptr) : x(header), value(ptr) {} void init(const StringRef& value, Node* header) { this->value = value; x = header; alreadyChecked = nullptr; level = MaxLevels; } // pre: !finished() force_inline void prefetch() { Node* next = x->getNext(level - 1); if (next) { _mm_prefetch((const char*)next, _MM_HINT_T0); _mm_prefetch((const char*)next + 64, _MM_HINT_T0); } } // pre: !finished() // Advances the pointer at the current level to a Node that's >= finger's value // if possible; or move to the next level (i.e., level--). // Returns true if we have advanced to the next level force_inline bool advance() { Node* next = x->getNext(level - 1); if (next == alreadyChecked || !less(next->value(), next->length(), value.begin(), value.size())) { alreadyChecked = next; level--; finger[level] = x; return true; } else { x = next; return false; } } // pre: !finished() force_inline void nextLevel() { while (!advance()) ; } force_inline bool finished() const { return level == 0; } // Returns if the finger value is found in the SkipList. force_inline Node* found() const { // valid after finished returns true Node* n = finger[0]->getNext(0); // or alreadyChecked, but that is more easily invalidated if (n && n->length() == value.size() && !memcmp(n->value(), value.begin(), value.size())) return n; else return nullptr; } StringRef getValue() const { Node* n = finger[0]->getNext(0); return n ? StringRef(n->value(), n->length()) : StringRef(); } }; // Returns the total number of nodes in the list. int count() const { int count = 0; Node* x = header->getNext(0); while (x) { x = x->getNext(0); count++; } return count; } explicit SkipList(Version version = 0) { header = Node::create(StringRef(), MaxLevels - 1); for (int l = 0; l < MaxLevels; l++) { header->setNext(l, nullptr); header->setMaxVersion(l, version); } } ~SkipList() { destroy(); } SkipList(SkipList&& other) noexcept : header(other.header) { other.header = nullptr; } void operator=(SkipList&& other) noexcept { destroy(); header = other.header; other.header = nullptr; } void swap(SkipList& other) { std::swap(header, other.header); } void addConflictRanges(const Finger* fingers, int rangeCount, Version version) { for (int r = rangeCount - 1; r >= 0; r--) { const Finger& startF = fingers[r * 2]; const Finger& endF = fingers[r * 2 + 1]; if (endF.found() == nullptr) insert(endF, endF.finger[0]->getMaxVersion(0)); remove(startF, endF); insert(startF, version); } } void detectConflicts(ReadConflictRange* ranges, int count, bool* transactionConflictStatus) { const int M = 16; int nextJob[M]; CheckMax inProgress[M]; if (!count) return; int started = std::min(M, count); for (int i = 0; i < started; i++) { inProgress[i].init(ranges[i], header, transactionConflictStatus, ranges[i].indexInTx, ranges[i].conflictingKeyRange, ranges[i].cKRArena); nextJob[i] = i + 1; } nextJob[started - 1] = 0; int prevJob = started - 1; int job = 0; // vtune: 340 parts while (true) { if (inProgress[job].advance()) { if (started == count) { if (prevJob == job) break; nextJob[prevJob] = nextJob[job]; job = prevJob; } else { int temp = started++; inProgress[job].init(ranges[temp], header, transactionConflictStatus, ranges[temp].indexInTx, ranges[temp].conflictingKeyRange, ranges[temp].cKRArena); } } prevJob = job; job = nextJob[job]; } } // Splits the version history represented by this skiplist into separate key ranges // delimited by the given array of keys. This SkipList is left empty. this->partition // is intended to be followed by a call to this->concatenate() recombining the same // partitions. In between, operations on each partition must not touch any keys outside // the partition. Specifically, the partition to the left of 'key' must not have a range // [...,key) inserted, since that would insert an entry at 'key'. // Note this function is not used. void partition(StringRef* begin, int splitCount, SkipList* output) { for (int i = splitCount - 1; i >= 0; i--) { Finger f(header, begin[i]); while (!f.finished()) f.nextLevel(); split(f, output[i + 1]); } swap(output[0]); } // Concatenates multiple SkipList objects into one and stores in input[0]. // Note this function is not used. void concatenate(SkipList* input, int count) { std::vector ends(count - 1); for (int i = 0; i < ends.size(); i++) input[i].getEnd(ends[i]); for (int l = 0; l < MaxLevels; l++) { for (int i = ends.size() - 1; i >= 0; i--) { ends[i].finger[l]->setNext(l, input[i + 1].header->getNext(l)); if (l && (!i || ends[i].finger[l] != input[i].header)) ends[i].finger[l]->calcVersionForLevel(l); input[i + 1].header->setNext(l, nullptr); } } swap(input[0]); } void find(const StringRef* values, Finger* results, int* temp, int count) { // Relying on the ordering of values, descend until the values aren't all in the // same part of the tree // vtune: 11 parts results[0].init(values[0], header); const StringRef& endValue = values[count - 1]; while (results[0].level > 1) { results[0].nextLevel(); Node* ac = results[0].alreadyChecked; if (ac && less(ac->value(), ac->length(), endValue.begin(), endValue.size())) break; } // Init all the other fingers to start descending where we stopped // the first one // SOMEDAY: this loop showed up on vtune, could be faster? // vtune: 8 parts int startLevel = results[0].level + 1; Node* x = startLevel < MaxLevels ? results[0].finger[startLevel] : header; for (int i = 1; i < count; i++) { results[i].level = startLevel; results[i].x = x; results[i].alreadyChecked = nullptr; results[i].value = values[i]; for (int j = startLevel; j < MaxLevels; j++) results[i].finger[j] = results[0].finger[j]; } int* nextJob = temp; for (int i = 0; i < count - 1; i++) nextJob[i] = i + 1; nextJob[count - 1] = 0; int prevJob = count - 1; int job = 0; // vtune: 225 parts while (true) { Finger* f = &results[job]; f->advance(); if (f->finished()) { if (prevJob == job) break; nextJob[prevJob] = nextJob[job]; } else { f->prefetch(); prevJob = job; } job = nextJob[job]; } } int removeBefore(Version v, Finger& f, int nodeCount) { // f.x, f.alreadyChecked? int removedCount = 0; bool wasAbove = true; while (nodeCount--) { Node* x = f.finger[0]->getNext(0); if (!x) break; // double prefetch gives +25% speed (single threaded) Node* next = x->getNext(0); _mm_prefetch((const char*)next, _MM_HINT_T0); next = x->getNext(1); _mm_prefetch((const char*)next, _MM_HINT_T0); bool isAbove = x->getMaxVersion(0) >= v; if (isAbove || wasAbove) { // f.nextItem for (int l = 0; l <= x->level(); l++) f.finger[l] = x; } else { // f.eraseItem removedCount++; for (int l = 0; l <= x->level(); l++) f.finger[l]->setNext(l, x->getNext(l)); for (int i = 1; i <= x->level(); i++) f.finger[i]->setMaxVersion(i, std::max(f.finger[i]->getMaxVersion(i), x->getMaxVersion(i))); x->destroy(); } wasAbove = isAbove; } return removedCount; } private: void remove(const Finger& start, const Finger& end) { if (start.finger[0] == end.finger[0]) return; Node* x = start.finger[0]->getNext(0); // vtune says: this loop is the expensive parts (6 parts) for (int i = 0; i < MaxLevels; i++) if (start.finger[i] != end.finger[i]) start.finger[i]->setNext(i, end.finger[i]->getNext(i)); while (true) { Node* next = x->getNext(0); x->destroy(); if (x == end.finger[0]) break; x = next; } } void insert(const Finger& f, Version version) { int level = randomLevel(); // std::cout << std::string((const char*)value,length) << " level: " << level << std::endl; Node* x = Node::create(f.value, level); x->setMaxVersion(0, version); for (int i = 0; i <= level; i++) { x->setNext(i, f.finger[i]->getNext(i)); f.finger[i]->setNext(i, x); } // vtune says: this loop is the costly part of this function for (int i = 1; i <= level; i++) { f.finger[i]->calcVersionForLevel(i); x->calcVersionForLevel(i); } for (int i = level + 1; i < MaxLevels; i++) { Version v = f.finger[i]->getMaxVersion(i); if (v >= version) break; f.finger[i]->setMaxVersion(i, version); } } void insert(const StringRef& value, Version version) { Finger f(header, value); while (!f.finished()) f.nextLevel(); // SOMEDAY: equality? insert(f, version); } struct CheckMax { Finger start, end; Version version; bool* result; int state; int indexInTx; VectorRef* conflictingKeyRange; // nullptr if report_conflicting_keys is not enabled. Arena* cKRArena; // nullptr if report_conflicting_keys is not enabled. void init(const ReadConflictRange& r, Node* header, bool* tCS, int indexInTx, VectorRef* cKR, Arena* cKRArena) { this->start.init(r.begin, header); this->end.init(r.end, header); this->version = r.version; this->indexInTx = indexInTx; this->cKRArena = cKRArena; result = &tCS[r.transaction]; conflictingKeyRange = cKR; this->state = 0; } bool noConflict() const { return true; } bool conflict() { *result = true; if (conflictingKeyRange != nullptr) conflictingKeyRange->push_back(*cKRArena, indexInTx); return true; } // Return true if finished force_inline bool advance() { switch (state) { case 0: // find where start and end fingers diverge while (true) { if (!start.advance()) { start.prefetch(); return false; } end.x = start.x; while (!end.advance()) ; int l = start.level; if (start.finger[l] != end.finger[l]) break; // accept if the range spans the check range, but does not have a greater version if (start.finger[l]->getMaxVersion(l) <= version) return noConflict(); if (l == 0) return conflict(); } state = 1; case 1: { // check the end side of the pyramid Node* e = end.finger[end.level]; while (e->getMaxVersion(end.level) > version) { if (end.finished()) return conflict(); end.nextLevel(); Node* f = end.finger[end.level]; while (e != f) { if (e->getMaxVersion(end.level) > version) return conflict(); e = e->getNext(end.level); } } // check the start side of the pyramid Node* s = end.finger[start.level]; while (true) { Node* nextS = start.finger[start.level]->getNext(start.level); Node* p = nextS; while (p != s) { if (p->getMaxVersion(start.level) > version) return conflict(); p = p->getNext(start.level); } if (start.finger[start.level]->getMaxVersion(start.level) <= version) return noConflict(); s = nextS; if (start.finished()) { if (nextS->length() == start.value.size() && !memcmp(nextS->value(), start.value.begin(), start.value.size())) return noConflict(); else return conflict(); } start.nextLevel(); } } default: __assume(false); } } }; // Splits the SkipLists so that those after finger is moved to "right". void split(const Finger& f, SkipList& right) { ASSERT(!right.header->getNext(0)); // right must be empty right.header->setMaxVersion(0, f.finger[0]->getMaxVersion(0)); for (int l = 0; l < MaxLevels; l++) { right.header->setNext(l, f.finger[l]->getNext(l)); f.finger[l]->setNext(l, nullptr); } } // Sets end's finger to the last nodes at all levels. void getEnd(Finger& end) { Node* node = header; for (int l = MaxLevels - 1; l >= 0; l--) { Node* next; while ((next = node->getNext(l)) != nullptr) node = next; end.finger[l] = node; } end.level = 0; } }; struct ConflictSet { ConflictSet() : removalKey(makeString(0)), oldestVersion(0) {} ~ConflictSet() {} SkipList versionHistory; Key removalKey; Version oldestVersion; }; ConflictSet* newConflictSet() { return new ConflictSet; } void clearConflictSet(ConflictSet* cs, Version v) { SkipList(v).swap(cs->versionHistory); } void destroyConflictSet(ConflictSet* cs) { delete cs; } ConflictBatch::ConflictBatch(ConflictSet* cs, std::map>* conflictingKeyRangeMap, Arena* resolveBatchReplyArena) : cs(cs), transactionCount(0), conflictingKeyRangeMap(conflictingKeyRangeMap), resolveBatchReplyArena(resolveBatchReplyArena) {} ConflictBatch::~ConflictBatch() {} struct TransactionInfo { VectorRef> readRanges; VectorRef> writeRanges; bool tooOld; bool reportConflictingKeys; }; void ConflictBatch::addTransaction(const CommitTransactionRef& tr) { const int t = transactionCount++; Arena& arena = transactionInfo.arena(); TransactionInfo* info = new (arena) TransactionInfo; info->reportConflictingKeys = tr.report_conflicting_keys; if (tr.read_snapshot < cs->oldestVersion && tr.read_conflict_ranges.size()) { info->tooOld = true; } else { info->tooOld = false; info->readRanges.resize(arena, tr.read_conflict_ranges.size()); info->writeRanges.resize(arena, tr.write_conflict_ranges.size()); for (int r = 0; r < tr.read_conflict_ranges.size(); r++) { const KeyRangeRef& range = tr.read_conflict_ranges[r]; points.emplace_back(range.begin, true, false, t, &info->readRanges[r].first); points.emplace_back(range.end, false, false, t, &info->readRanges[r].second); combinedReadConflictRanges.emplace_back(range.begin, range.end, tr.read_snapshot, t, r, tr.report_conflicting_keys ? &(*conflictingKeyRangeMap)[t] : nullptr, tr.report_conflicting_keys ? resolveBatchReplyArena : nullptr); } for (int r = 0; r < tr.write_conflict_ranges.size(); r++) { const KeyRangeRef& range = tr.write_conflict_ranges[r]; points.emplace_back(range.begin, true, true, t, &info->writeRanges[r].first); points.emplace_back(range.end, false, true, t, &info->writeRanges[r].second); } } transactionInfo.push_back(arena, info); } // SOMEDAY: This should probably be replaced with a roaring bitmap. class MiniConflictSet : NonCopyable { std::vector values; public: explicit MiniConflictSet(int size) { values.assign(size, false); } void set(int begin, int end) { for (int i = begin; i < end; i++) values[i] = true; } bool any(int begin, int end) { for (int i = begin; i < end; i++) if (values[i]) return true; return false; } }; void ConflictBatch::checkIntraBatchConflicts() { int index = 0; for (int p = 0; p < points.size(); p++) *points[p].pIndex = index++; MiniConflictSet mcs(index); for (int t = 0; t < transactionInfo.size(); t++) { const TransactionInfo& tr = *transactionInfo[t]; if (transactionConflictStatus[t]) continue; bool conflict = tr.tooOld; for (int i = 0; i < tr.readRanges.size(); i++) { if (mcs.any(tr.readRanges[i].first, tr.readRanges[i].second)) { if (tr.reportConflictingKeys) { (*conflictingKeyRangeMap)[t].push_back(*resolveBatchReplyArena, i); } conflict = true; break; } } transactionConflictStatus[t] = conflict; if (!conflict) for (int i = 0; i < tr.writeRanges.size(); i++) mcs.set(tr.writeRanges[i].first, tr.writeRanges[i].second); } } void ConflictBatch::GetTooOldTransactions(std::vector& tooOldTransactions) { for (int i = 0; i < transactionInfo.size(); i++) { if (transactionInfo[i]->tooOld) { tooOldTransactions.push_back(i); } } } void ConflictBatch::detectConflicts(Version now, Version newOldestVersion, std::vector& nonConflicting, std::vector* tooOldTransactions) { double t = timer(); sortPoints(points); g_sort += timer() - t; transactionConflictStatus = new bool[transactionCount]; memset(transactionConflictStatus, 0, transactionCount * sizeof(bool)); t = timer(); checkReadConflictRanges(); g_checkRead += timer() - t; t = timer(); checkIntraBatchConflicts(); g_checkBatch += timer() - t; t = timer(); combineWriteConflictRanges(); g_combine += timer() - t; t = timer(); mergeWriteConflictRanges(now); g_merge += timer() - t; for (int i = 0; i < transactionCount; i++) { if (tooOldTransactions && transactionInfo[i]->tooOld) { tooOldTransactions->push_back(i); } else if (!transactionConflictStatus[i]) { nonConflicting.push_back(i); } } delete[] transactionConflictStatus; t = timer(); if (newOldestVersion > cs->oldestVersion) { cs->oldestVersion = newOldestVersion; SkipList::Finger finger; int temp; cs->versionHistory.find(&cs->removalKey, &finger, &temp, 1); cs->versionHistory.removeBefore(cs->oldestVersion, finger, combinedWriteConflictRanges.size() * 3 + 10); cs->removalKey = finger.getValue(); } g_removeBefore += timer() - t; } void ConflictBatch::checkReadConflictRanges() { if (combinedReadConflictRanges.empty()) return; cs->versionHistory.detectConflicts( &combinedReadConflictRanges[0], combinedReadConflictRanges.size(), transactionConflictStatus); } void ConflictBatch::addConflictRanges(Version now, std::vector>::iterator begin, std::vector>::iterator end, SkipList* part) { const int count = end - begin; static_assert(sizeof(*begin) == sizeof(StringRef) * 2, "Write Conflict Range type not convertible to two StringPtrs"); const StringRef* strings = reinterpret_cast(&*begin); const int stringCount = count * 2; const int stripeSize = 16; SkipList::Finger fingers[stripeSize]; int temp[stripeSize]; int stripes = (stringCount + stripeSize - 1) / stripeSize; int ss = stringCount - (stripes - 1) * stripeSize; for (int s = stripes - 1; s >= 0; s--) { part->find(&strings[s * stripeSize], fingers, temp, ss); part->addConflictRanges(fingers, ss / 2, now); ss = stripeSize; } } void ConflictBatch::mergeWriteConflictRanges(Version now) { if (combinedWriteConflictRanges.empty()) return; addConflictRanges(now, combinedWriteConflictRanges.begin(), combinedWriteConflictRanges.end(), &cs->versionHistory); } void ConflictBatch::combineWriteConflictRanges() { int activeWriteCount = 0; for (const KeyInfo& point : points) { if (point.write && !transactionConflictStatus[point.transaction]) { if (point.begin) { activeWriteCount++; if (activeWriteCount == 1) combinedWriteConflictRanges.emplace_back(point.key, KeyRef()); } else /*if (point.end)*/ { activeWriteCount--; if (activeWriteCount == 0) combinedWriteConflictRanges.back().second = point.key; } } } } namespace { StringRef setK(Arena& arena, int i) { char t[sizeof(i)]; *(int*)t = i; const int keySize = 16; char* ss = new (arena) char[keySize]; for (int c = 0; c < keySize - sizeof(i); c++) ss[c] = '.'; for (int c = 0; c < sizeof(i); c++) ss[c + keySize - sizeof(i)] = t[sizeof(i) - 1 - c]; return StringRef((const uint8_t*)ss, keySize); } void miniConflictSetTest() { for (int i = 0; i < 2000000; i++) { int size = 64 * 5; // Also run 64*64*5 to test multiple words of andValues and orValues MiniConflictSet mini(size); for (int j = 0; j < 2; j++) { int a = deterministicRandom()->randomInt(0, size); int b = deterministicRandom()->randomInt(a, size); mini.set(a, b); } for (int j = 0; j < 4; j++) { int a = deterministicRandom()->randomInt(0, size); int b = deterministicRandom()->randomInt(a, size); mini.any(a, b); // Tests correctness internally } } printf("miniConflictSetTest complete\n"); } void operatorLessThanTest() { { // Longer strings before shorter strings. KeyInfo a(LiteralStringRef("hello"), /*begin=*/false, /*write=*/true, 0, nullptr); KeyInfo b(LiteralStringRef("hello\0"), /*begin=*/false, /*write=*/false, 0, nullptr); ASSERT(a < b); ASSERT(!(b < a)); ASSERT(!(a == b)); } { // Reads before writes. KeyInfo a(LiteralStringRef("hello"), /*begin=*/false, /*write=*/false, 0, nullptr); KeyInfo b(LiteralStringRef("hello"), /*begin=*/false, /*write=*/true, 0, nullptr); ASSERT(a < b); ASSERT(!(b < a)); ASSERT(!(a == b)); } { // Begin reads after writes. KeyInfo a(LiteralStringRef("hello"), /*begin=*/false, /*write=*/true, 0, nullptr); KeyInfo b(LiteralStringRef("hello"), /*begin=*/true, /*write=*/false, 0, nullptr); ASSERT(a < b); ASSERT(!(b < a)); ASSERT(!(a == b)); } { // Begin writes after writes. KeyInfo a(LiteralStringRef("hello"), /*begin=*/false, /*write=*/true, 0, nullptr); KeyInfo b(LiteralStringRef("hello"), /*begin=*/true, /*write=*/true, 0, nullptr); ASSERT(a < b); ASSERT(!(b < a)); ASSERT(!(a == b)); } } } // namespace void skipListTest() { printf("Skip list test\n"); miniConflictSetTest(); operatorLessThanTest(); setAffinity(0); double start; ConflictSet* cs = newConflictSet(); Arena testDataArena; VectorRef> testData; const int batches = 500; // deterministicRandom()->randomInt(500, 5000); const int data_per_batch = 5000; testData.resize(testDataArena, batches); std::vector> success(batches); std::vector> success2(batches); for (int i = 0; i < batches; i++) { testData[i].resize(testDataArena, data_per_batch); success[i].assign(data_per_batch, false); success2[i].assign(data_per_batch, false); for (int j = 0; j < data_per_batch; j++) { int key = deterministicRandom()->randomInt(0, 20000000); int key2 = key + 1 + deterministicRandom()->randomInt(0, 10); testData[i][j] = KeyRangeRef(setK(testDataArena, key), setK(testDataArena, key2)); } } printf("Test data generated: %d batches, %d/batch\n", batches, data_per_batch); printf("Running\n"); int readCount = 1, writeCount = 1; int cranges = 0, tcount = 0; start = timer(); std::vector> nonConflict(batches); Version version = 0; for (const auto& data : testData) { Arena buf; std::vector trs; double t = timer(); for (int j = 0; j + readCount + writeCount <= data.size(); j += readCount + writeCount) { CommitTransactionRef tr; for (int k = 0; k < readCount; k++) { KeyRangeRef r(buf, data[j + k]); tr.read_conflict_ranges.push_back(buf, r); } for (int k = 0; k < writeCount; k++) { KeyRangeRef r(buf, data[j + readCount + k]); tr.write_conflict_ranges.push_back(buf, r); } cranges += tr.read_conflict_ranges.size() + tr.write_conflict_ranges.size(); tr.read_snapshot = version; trs.push_back(tr); } tcount += trs.size(); g_buildTest += timer() - t; t = timer(); ConflictBatch batch(cs); for (const auto& tr : trs) { batch.addTransaction(tr); } g_add += timer() - t; t = timer(); batch.detectConflicts(version + 50, version, nonConflict[version]); g_detectConflicts += timer() - t; version++; } double elapsed = timer() - start; printf("New conflict set: %0.3f sec\n", elapsed); printf(" %0.3f Mtransactions/sec\n", tcount / elapsed / 1e6); printf(" %0.3f Mkeys/sec\n", cranges * 2 / elapsed / 1e6); elapsed = g_detectConflicts.getValue(); printf("Detect only: %0.3f sec\n", elapsed); printf(" %0.3f Mtransactions/sec\n", tcount / elapsed / 1e6); printf(" %0.3f Mkeys/sec\n", cranges * 2 / elapsed / 1e6); elapsed = g_checkRead.getValue() + g_merge.getValue(); printf("Skiplist only: %0.3f sec\n", elapsed); printf(" %0.3f Mtransactions/sec\n", tcount / elapsed / 1e6); printf(" %0.3f Mkeys/sec\n", cranges * 2 / elapsed / 1e6); printf("Performance counters:\n"); for (const auto& counter : skc) { printf("%20s: %s\n", counter->getMetric().name().c_str(), counter->getMetric().formatted().c_str()); } printf("%d entries in version history\n", cs->versionHistory.count()); }