/* * FastAlloc.h * * This source file is part of the FoundationDB open source project * * Copyright 2013-2018 Apple Inc. and the FoundationDB project authors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef FLOW_FASTALLOC_H #define FLOW_FASTALLOC_H #pragma once #include "flow/Error.h" #include "flow/Platform.h" // ALLOC_INSTRUMENTATION_STDOUT enables non-sampled logging of all allocations and deallocations to stdout to be processed by tools/alloc_instrumentation.py //#define ALLOC_INSTRUMENTATION_STDOUT ENABLED(NOT_IN_CLEAN) //#define ALLOC_INSTRUMENTATION ENABLED(NOT_IN_CLEAN) // The form "(1==1)" in this context is used to satisfy both clang and vc++ with a single syntax. Clang rejects "1" and vc++ rejects "true". #define FASTALLOC_THREAD_SAFE (FLOW_THREAD_SAFE || (1==1)) #if VALGRIND #include #include #endif #include "flow/Hash3.h" #include #include #include #include #include #include #if defined(ALLOC_INSTRUMENTATION) && defined(__linux__) #include #include #include #include #endif #ifdef ALLOC_INSTRUMENTATION #include #include #include "flow/ThreadPrimitives.h" struct AllocInstrInfo { int64_t allocCount; int64_t deallocCount; int64_t maxAllocated; inline void alloc(int64_t count=1) { allocCount += count; maxAllocated = std::max(allocCount-deallocCount,maxAllocated); } inline void dealloc(int64_t count=1) { deallocCount += count; } }; extern std::map allocInstr; #define INSTRUMENT_ALLOCATE(name) (allocInstr[(name)].alloc()) #define INSTRUMENT_RELEASE(name) (allocInstr[(name)].dealloc()) //extern std::map stackAllocations; // maps from an address to the hash of the backtrace and the size of the alloction extern std::unordered_map> memSample; struct BackTraceAccount { double count; size_t sampleCount; size_t totalSize; std::vector *backTrace; }; // maps from a hash of a backtrace to a backtrace and the total size of data currently allocated from this stack extern std::unordered_map backTraceLookup; extern ThreadSpinLock memLock; extern thread_local bool memSample_entered; extern const size_t SAMPLE_BYTES; #else #define INSTRUMENT_ALLOCATE(name) #define INSTRUMENT_RELEASE(name) #endif #if defined(ALLOC_INSTRUMENTATION) || defined(ALLOC_INSTRUMENTATION_STDOUT) void recordAllocation( void *ptr, size_t size ); void recordDeallocation( void *ptr ); #endif template class FastAllocator { public: [[nodiscard]] static void* allocate(); static void release(void* ptr); static void check( void* ptr, bool alloc ); static long long getTotalMemory(); static long long getApproximateMemoryUnused(); static long long getActiveThreads(); static void releaseThreadMagazines(); #ifdef ALLOC_INSTRUMENTATION static volatile int32_t pageCount; #endif private: #ifdef VALGRIND static unsigned long vLock; #endif static const int magazine_size = (128<<10) / Size; static const int PSize = Size / sizeof(void*); struct GlobalData; struct ThreadData { void* freelist; int count; // there are count items on freelist void* alternate; // alternate is either a full magazine, or an empty one }; static thread_local ThreadData threadData; static thread_local bool threadInitialized; static GlobalData* globalData() { #ifdef VALGRIND ANNOTATE_RWLOCK_ACQUIRED(vLock, 1); #endif static GlobalData *data = new GlobalData(); // This is thread-safe as of c++11 (VS 2015, gcc 4.8, clang 3.3) #ifdef VALGRIND ANNOTATE_RWLOCK_RELEASED(vLock, 1); #endif return data; } static void* freelist; FastAllocator(); // not implemented static void initThread(); static void getMagazine(); static void releaseMagazine(void*); }; extern std::atomic g_hugeArenaMemory; void hugeArenaSample(int size); void releaseAllThreadMagazines(); int64_t getTotalUnusedAllocatedMemory(); void setFastAllocatorThreadInitFunction( void (*)() ); // The given function will be called at least once in each thread that allocates from a FastAllocator. Currently just one such function is tracked. inline constexpr int nextFastAllocatedSize(int x) { assert(x > 0 && x <= 8192); if (x <= 16) return 16; else if (x <= 32) return 32; else if (x <= 64) return 64; else if (x <= 96) return 96; else if (x <= 128) return 128; else if (x <= 256) return 256; else if (x <= 512) return 512; else if (x <= 1024) return 1024; else if (x <= 2048) return 2048; else if (x <= 4096) return 4096; else return 8192; } template class FastAllocated { public: [[nodiscard]] static void* operator new(size_t s) { if (s != sizeof(Object)) abort(); INSTRUMENT_ALLOCATE(typeid(Object).name()); void* p = FastAllocator < sizeof(Object) <= 64 ? 64 : nextFastAllocatedSize(sizeof(Object)) > ::allocate(); return p; } static void operator delete(void* s) { INSTRUMENT_RELEASE(typeid(Object).name()); FastAllocator::release(s); } // Redefine placement new so you can still use it static void* operator new( size_t, void* p ) { return p; } static void operator delete( void*, void* ) { } }; [[nodiscard]] inline void* allocateFast(int size) { if (size <= 16) return FastAllocator<16>::allocate(); if (size <= 32) return FastAllocator<32>::allocate(); if (size <= 64) return FastAllocator<64>::allocate(); if (size <= 96) return FastAllocator<96>::allocate(); if (size <= 128) return FastAllocator<128>::allocate(); if (size <= 256) return FastAllocator<256>::allocate(); if (size <= 512) return FastAllocator<512>::allocate(); if (size <= 1024) return FastAllocator<1024>::allocate(); if (size <= 2048) return FastAllocator<2048>::allocate(); if (size <= 4096) return FastAllocator<4096>::allocate(); if (size <= 8192) return FastAllocator<8192>::allocate(); return new uint8_t[size]; } inline void freeFast(int size, void* ptr) { if (size <= 16) return FastAllocator<16>::release(ptr); if (size <= 32) return FastAllocator<32>::release(ptr); if (size <= 64) return FastAllocator<64>::release(ptr); if (size <= 96) return FastAllocator<96>::release(ptr); if (size <= 128) return FastAllocator<128>::release(ptr); if (size <= 256) return FastAllocator<256>::release(ptr); if (size <= 512) return FastAllocator<512>::release(ptr); if (size <= 1024) return FastAllocator<1024>::release(ptr); if (size <= 2048) return FastAllocator<2048>::release(ptr); if (size <= 4096) return FastAllocator<4096>::release(ptr); if (size <= 8192) return FastAllocator<8192>::release(ptr); delete[](uint8_t*)ptr; } #endif