# # vector.py # # This source file is part of the FoundationDB open source project # # Copyright 2013-2018 Apple Inc. and the FoundationDB project authors # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # """FoundationDB Vector Layer. Provides the Vector() class for storing and manipulating arrays in FoundationDB. """ import fdb import fdb.tuple import threading fdb.api_version(22) ################################### # This defines a Subspace of keys # ################################### class Subspace (object): def __init__(self, prefixTuple, rawPrefix=""): self.rawPrefix = rawPrefix + fdb.tuple.pack(prefixTuple) def __getitem__(self, name): return Subspace((name,), self.rawPrefix) def key(self): return self.rawPrefix def pack(self, tuple): return self.rawPrefix + fdb.tuple.pack(tuple) def unpack(self, key): assert key.startswith(self.rawPrefix) return fdb.tuple.unpack(key[len(self.rawPrefix):]) def range(self, tuple=()): p = fdb.tuple.range(tuple) return slice(self.rawPrefix + p.start, self.rawPrefix + p.stop) ######################## # _ImplicitTransaction # ######################## # A local class which is used to allow vector operations to be performed without # explicitly passing a transaction. It is created by vector.use_transaction # and is used as follows: # # with vector.use_transaction(tr): # vector[0] = 1 # vector.push(1) # ... class _ImplicitTransaction: def __init__(self, vector, tr): self.vector = vector self.tr = tr self.initialValue = self.vector.local.tr def __enter__(self): if self.initialValue is not None and self.vector.local.tr != self.tr: raise Exception('use_transaction cannot be nested') self.vector.local.tr = self.tr def __exit__(self, type, value, traceback): self.vector.local.tr = self.initialValue ########## # Vector # ########## # Vector stores each of its values using its index as the key. # The size of a vector is equal to the index of its last key + 1. ## # For indexes smaller than the vector's size that have no associated key # in the database, the value will be the specified defaultValue. ## # If the last value in the vector has the default value, its key will # always be set so that size can be determined. ## # By creating Vector with a Subspace, all kv pairs modified by the # layer will have keys that start within that Subspace. class Vector: """Represents a potentially sparse array in FoundationDB.""" # Public functions def __init__(self, subspace, defaultValue=''): self.subspace = subspace self.defaultValue = defaultValue self.local = threading.local() self.local.tr = None def use_transaction(self, tr): """ Get an object that can be used in a with statement to perform operations on this vector without supplying a transaction as an argument to each operation. For example: with vector.use_transaction(tr): vector[0] = 1 vector.push(1) ... """ return _ImplicitTransaction(self, tr) def size(self, tr=None): """Get the number of items in the Vector. This number includes the sparsely represented items.""" return self._size(self._to_transaction(tr)) def push(self, val, tr=None): """Push a single item onto the end of the Vector.""" self._push(val, self._to_transaction(tr)) def back(self, tr=None): """Get the value of the last item in the Vector.""" return self._back(self._to_transaction(tr)) def front(self, tr=None): """Get the value of the first item in the Vector.""" return self._get(0, self._to_transaction(tr)) def pop(self, tr=None): """Get and pops the last item off the Vector.""" return self._pop(self._to_transaction(tr)) def swap(self, i1, i2, tr=None): """Swap the items at positions i1 and i2.""" self._swap(i1, i2, self._to_transaction(tr)) def get(self, index, tr=None): """Get the item at the specified index.""" return self._get(index, self._to_transaction(tr)) def get_range(self, startIndex=None, endIndex=None, step=None, tr=None): """Get a range of items in the Vector, returned as a generator.""" return self._get_range(startIndex, endIndex, step, self._to_transaction(tr)) def set(self, index, val, tr=None): """Set the value at a particular index in the Vector.""" self._set(index, val, self._to_transaction(tr)) def empty(self, tr=None): """Test whether the Vector is empty.""" return self._size(self._to_transaction(tr)) == 0 def resize(self, length, tr=None): """Grow or shrink the size of the Vector.""" self._resize(length, self._to_transaction(tr)) def clear(self, tr=None): """Remove all items from the Vector.""" self._clear(self._to_transaction(tr)) # Vector supports array notation when combined with use_transaction def __setitem__(self, index, val): """Set the item at the specified index. Can only be used with use_transaction.""" self.set(index, val) def __getitem__(self, index): """ Get the item(s) at the specified index or range. Ranges are returned as a generator. Can only be used with use_transaction() """ if isinstance(index, slice): return self.get_range(index.start, index.stop, index.step) else: return self.get(index) # Private functions @fdb.transactional def _push(self, val, tr): tr[self._key_at(self.size())] = fdb.tuple.pack((val,)) @fdb.transactional def _back(self, tr): keyRange = self.subspace.range() last = tr.get_range(keyRange.start, keyRange.stop, 1, True) for k, v in last: return fdb.tuple.unpack(v)[0] return None @fdb.transactional def _pop(self, tr): keyRange = self.subspace.range() # Read the last two entries so we can check if the second to last item # is being represented sparsely. If so, we will be required to set it # to the default value lastTwo = list(tr.get_range(keyRange.start, keyRange.stop, 2, True)) indices = [self.subspace.unpack(kv.key)[0] for kv in lastTwo] # Vector was empty if len(lastTwo) == 0: return None # Vector has size one: elif indices[0] == 0: pass # Second to last item is being represented sparsely elif len(lastTwo) == 1 or indices[0] > indices[1] + 1: tr[self._key_at(indices[0] - 1)] = fdb.tuple.pack((self.defaultValue,)) del tr[lastTwo[0].key] return fdb.tuple.unpack(lastTwo[0].value)[0] @fdb.transactional def _swap(self, i1, i2, tr): k1 = self._key_at(i1) k2 = self._key_at(i2) currentSize = self._size(tr) v1 = tr[k1] v2 = tr[k2] if i1 > currentSize or i2 > currentSize or i1 < 0 or i2 < 0: raise IndexError('vector.swap: indices (%d, %d) out of range' % (i1, i2)) if v2.present(): tr[k1] = v2 elif v1.present() and i1 < currentSize - 1: del tr[k1] if v1.present(): tr[k2] = v1 elif v2.present() and i2 < currentSize - 1: del tr[k2] @fdb.transactional def _get(self, index, tr): if index < 0: raise IndexError('vector.get: index \'%d\' out of range' % index) start = self._key_at(index) end = self.subspace.range().stop output = tr.get_range(start, end, 1) for k, v in output: # The requested index had an associated key if(start == k): return fdb.tuple.unpack(v)[0] # The requested index is sparsely represented return self.defaultValue # We requested a value past the end of the vector raise IndexError('vector.get: index \'%d\' out of range' % index) def _get_range(self, startIndex, endIndex, step, tr): size = self._size(tr) if startIndex is not None and startIndex < 0: startIndex = max(0, size + startIndex) if endIndex is not None and endIndex < 0: endIndex = max(0, size + endIndex) if step is None: if startIndex is None or endIndex is None or startIndex <= endIndex: step = 1 else: step = -1 elif step == 0: raise ValueError('vector.get_range: step cannot be zero') if startIndex is None: if step > 0: start = self.subspace.range().start else: end = self.subspace.range().stop else: if step > 0: start = self._key_at(startIndex) else: end = self._key_at(startIndex + 1) if endIndex is None: if step > 0: end = self.subspace.range().stop else: start = self.subspace.range().start else: if step > 0: end = self._key_at(endIndex) else: start = self._key_at(endIndex + 1) result = tr.get_range(start, end, 0, step < 0) currentIndex = startIndex if currentIndex is None: if step > 0: currentIndex = 0 else: currentIndex = size - 1 elif currentIndex >= size: currentIndex = size - 1 for k, v in result: keyIndex = self.subspace.unpack(k)[0] while (step > 0 and currentIndex < keyIndex) or (step < 0 and currentIndex > keyIndex): currentIndex = currentIndex + step yield self.defaultValue if currentIndex == keyIndex: currentIndex = currentIndex + step yield fdb.tuple.unpack(v)[0] @fdb.transactional def _size(self, tr): keyRange = self.subspace.range() lastKey = tr.get_key(fdb.KeySelector.last_less_or_equal(keyRange.stop)) if lastKey < keyRange.start: return 0 return self.subspace.unpack(lastKey)[0] + 1 @fdb.transactional def _set(self, index, val, tr): tr[self._key_at(index)] = fdb.tuple.pack((val,)) @fdb.transactional def _resize(self, length, tr): currentSize = self.size() if(length == currentSize): return if(length < currentSize): self._shrink(tr, length, currentSize) else: self._expand(tr, length, currentSize) @fdb.transactional def _shrink(self, tr, length, currentSize): tr.clear_range(self._key_at(length), self.subspace.range().stop) # Check if the new end of the vector was being sparsely represented if self._size(tr) < length: tr[self._key_at(length - 1)] = fdb.tuple.pack((self.defaultValue,)) @fdb.transactional def _expand(self, tr, length, currentSize): tr[self._key_at(length - 1)] = fdb.tuple.pack((self.defaultValue,)) @fdb.transactional def _clear(self, tr): del tr[self.subspace.range()] def _to_transaction(self, tr): if tr is None: if self.local.tr is None: raise Exception('No transaction specified and use_transaction has not been called') else: return self.local.tr else: return tr def _key_at(self, index): return self.subspace.pack((index,)) ################## # internal tests # ################## # caution: modifies the database! @fdb.transactional def vector_test(tr): vector = Vector(Subspace(('test_vector',)), 0) with vector.use_transaction(tr): print 'Clearing any previous values in the vector' vector.clear() print '\nMODIFIERS' # Set + Push vector[0] = 1 vector[1] = 2 vector.push(3) _print_vector(vector, tr) # Swap vector.swap(0, 2) _print_vector(vector, tr) # Pop print 'Popped:', vector.pop() _print_vector(vector, tr) # Clear vector.clear() print 'Pop empty:', vector.pop() _print_vector(vector, tr) vector.push('Foo') print 'Pop size 1:', vector.pop() _print_vector(vector, tr) print '\nCAPACITY OPERATIONS' # Capacity print 'Size:', vector.size() print 'Empty:', vector.empty() print 'Resizing to length 5' vector.resize(5) _print_vector(vector, tr) print 'Size:', vector.size() print 'Setting values' vector[0] = 'The' vector[1] = 'Quick' vector[2] = 'Brown' vector[3] = 'Fox' vector[4] = 'Jumps' vector[5] = 'Over' _print_vector(vector, tr) print '\nFRONT' print vector.front() print '\nBACK' print vector.back() print '\nELEMENT ACCESS' print 'Index 0:', vector[0] print 'Index 5:', vector[5] _print_vector(vector, tr) print 'Size:', vector.size() print '\nRESIZING' print 'Resizing to 3' vector.resize(3) _print_vector(vector, tr) print 'Size:', vector.size() print 'Resizing to 3 again' vector.resize(3) _print_vector(vector, tr) print 'Size:', vector.size() print 'Resizing to 6' vector.resize(6) _print_vector(vector, tr) print 'Size:', vector.size() print '\nSPARSE TESTS' print 'Popping sparse vector' vector.pop() _print_vector(vector, tr) print 'Size:', vector.size() print 'Resizing to 4' vector.resize(4) _print_vector(vector, tr) print 'Size:', vector.size() print 'Adding "word" to index 10, resize to 25' vector[10] = 'word' vector.resize(25) _print_vector(vector, tr) print 'Size:', vector.size() print 'Popping sparse vector' vector.pop() _print_vector(vector, tr) print 'Size:', vector.size() print 'Swapping with sparse element' vector.swap(10, 15) _print_vector(vector, tr) print 'Size:', vector.size() print 'Swapping sparse elements' vector.swap(12, 13) _print_vector(vector, tr) print 'Size:', vector.size() ############################## # Vector sample usage # ############################## import sys # caution: modifies the database! @fdb.transactional def vector_example(tr): vector = Vector(Subspace(('my_vector',)), 0) with vector.use_transaction(tr): vector.clear() for i in range(0, 5): vector.push(i) _print_vector(vector, tr) print 'Pop last item: %d' % vector.pop() _print_vector(vector, tr) vector[1] = 10 vector.resize(11) _print_vector(vector, tr) vector.swap(1, 10) _print_vector(vector, tr) def _print_vector(vector, tr): first = True with vector.use_transaction(tr): for v in vector: if not first: sys.stdout.write(',') first = False sys.stdout.write(repr(v)) print # caution: modifies the database! if __name__ == '__main__': db = fdb.open() vector_example(db) # vector_test(db)