mirror of
https://github.com/facebook/rocksdb.git
synced 2025-05-14 00:43:07 +08:00
Account memory of FileMetaData in global memory limit (#9924)
Summary: **Context/Summary:** As revealed by heap profiling, allocation of `FileMetaData` for [newly created file added to a Version](https://github.com/facebook/rocksdb/pull/9924/files#diff-a6aa385940793f95a2c5b39cc670bd440c4547fa54fd44622f756382d5e47e43R774) can consume significant heap memory. This PR is to account that toward our global memory limit based on block cache capacity. Pull Request resolved: https://github.com/facebook/rocksdb/pull/9924 Test Plan: - Previous `make check` verified there are only 2 places where the memory of the allocated `FileMetaData` can be released - New unit test `TEST_P(ChargeFileMetadataTestWithParam, Basic)` - db bench (CPU cost of `charge_file_metadata` in write and compact) - **write micros/op: -0.24%** : `TEST_TMPDIR=/dev/shm/testdb ./db_bench -benchmarks=fillseq -db=$TEST_TMPDIR -charge_file_metadata=1 (remove this option for pre-PR) -disable_auto_compactions=1 -write_buffer_size=100000 -num=4000000 | egrep 'fillseq'` - **compact micros/op -0.87%** : `TEST_TMPDIR=/dev/shm/testdb ./db_bench -benchmarks=fillseq -db=$TEST_TMPDIR -charge_file_metadata=1 -disable_auto_compactions=1 -write_buffer_size=100000 -num=4000000 -numdistinct=1000 && ./db_bench -benchmarks=compact -db=$TEST_TMPDIR -use_existing_db=1 -charge_file_metadata=1 -disable_auto_compactions=1 | egrep 'compact'` table 1 - write #-run | (pre-PR) avg micros/op | std micros/op | (post-PR) micros/op | std micros/op | change (%) -- | -- | -- | -- | -- | -- 10 | 3.9711 | 0.264408 | 3.9914 | 0.254563 | 0.5111933721 20 | 3.83905 | 0.0664488 | 3.8251 | 0.0695456 | -0.3633711465 40 | 3.86625 | 0.136669 | 3.8867 | 0.143765 | 0.5289363078 80 | 3.87828 | 0.119007 | 3.86791 | 0.115674 | **-0.2673865734** 160 | 3.87677 | 0.162231 | 3.86739 | 0.16663 | **-0.2419539978** table 2 - compact #-run | (pre-PR) avg micros/op | std micros/op | (post-PR) micros/op | std micros/op | change (%) -- | -- | -- | -- | -- | -- 10 | 2,399,650.00 | 96,375.80 | 2,359,537.00 | 53,243.60 | -1.67 20 | 2,410,480.00 | 89,988.00 | 2,433,580.00 | 91,121.20 | 0.96 40 | 2.41E+06 | 121811 | 2.39E+06 | 131525 | **-0.96** 80 | 2.40E+06 | 134503 | 2.39E+06 | 108799 | **-0.78** - stress test: `python3 tools/db_crashtest.py blackbox --charge_file_metadata=1 --cache_size=1` killed as normal Reviewed By: ajkr Differential Revision: D36055583 Pulled By: hx235 fbshipit-source-id: b60eab94707103cb1322cf815f05810ef0232625
This commit is contained in:
parent
40d19bc12c
commit
d665afdbf3
@ -51,6 +51,8 @@
|
||||
* RemoteCompaction supports table_properties_collector_factories override on compaction worker.
|
||||
* Start tracking SST unique id in MANIFEST, which will be used to verify with SST properties during DB open to make sure the SST file is not overwritten or misplaced. A db option `verify_sst_unique_id_in_manifest` is introduced to enable/disable the verification, if enabled all SST files will be opened during DB-open to verify the unique id (default is false), so it's recommended to use it with `max_open_files = -1` to pre-open the files.
|
||||
* Added the ability to concurrently read data blocks from multiple files in a level in batched MultiGet. This can be enabled by setting the async_io option in ReadOptions. Using this feature requires a FileSystem that supports ReadAsync (PosixFileSystem is not supported yet for this), and for RocksDB to be compiled with folly and c++20.
|
||||
* Charge memory usage of file metadata. RocksDB holds one file metadata structure in-memory per on-disk table file. If an operation reserving memory for file metadata exceeds the avaible space left in the block
|
||||
cache at some point (i.e, causing a cache full under `LRUCacheOptions::strict_capacity_limit` = true), creation will fail with `Status::MemoryLimit()`. To opt in this feature, enable charging `CacheEntryRole::kFileMetadata` in `BlockBasedTableOptions::cache_usage_options`.
|
||||
|
||||
### Public API changes
|
||||
* Add rollback_deletion_type_callback to TransactionDBOptions so that write-prepared transactions know whether to issue a Delete or SingleDelete to cancel a previous key written during prior prepare phase. The PR aims to prevent mixing SingleDeletes and Deletes for the same key that can lead to undefined behaviors for write-prepared transactions.
|
||||
|
2
cache/cache_entry_roles.cc
vendored
2
cache/cache_entry_roles.cc
vendored
@ -22,6 +22,7 @@ std::array<std::string, kNumCacheEntryRoles> kCacheEntryRoleToCamelString{{
|
||||
"CompressionDictionaryBuildingBuffer",
|
||||
"FilterConstruction",
|
||||
"BlockBasedTableReader",
|
||||
"FileMetadata",
|
||||
"Misc",
|
||||
}};
|
||||
|
||||
@ -36,6 +37,7 @@ std::array<std::string, kNumCacheEntryRoles> kCacheEntryRoleToHyphenString{{
|
||||
"compression-dictionary-building-buffer",
|
||||
"filter-construction",
|
||||
"block-based-table-reader",
|
||||
"file-metadata",
|
||||
"misc",
|
||||
}};
|
||||
|
||||
|
1
cache/cache_reservation_manager.cc
vendored
1
cache/cache_reservation_manager.cc
vendored
@ -180,4 +180,5 @@ template class CacheReservationManagerImpl<
|
||||
template class CacheReservationManagerImpl<CacheEntryRole::kFilterConstruction>;
|
||||
template class CacheReservationManagerImpl<CacheEntryRole::kMisc>;
|
||||
template class CacheReservationManagerImpl<CacheEntryRole::kWriteBuffer>;
|
||||
template class CacheReservationManagerImpl<CacheEntryRole::kFileMetadata>;
|
||||
} // namespace ROCKSDB_NAMESPACE
|
||||
|
28
cache/cache_reservation_manager.h
vendored
28
cache/cache_reservation_manager.h
vendored
@ -36,6 +36,12 @@ class CacheReservationManager {
|
||||
};
|
||||
virtual ~CacheReservationManager() {}
|
||||
virtual Status UpdateCacheReservation(std::size_t new_memory_used) = 0;
|
||||
// TODO(hx235): replace the usage of
|
||||
// `UpdateCacheReservation(memory_used_delta, increase)` with
|
||||
// `UpdateCacheReservation(new_memory_used)` so that we only have one
|
||||
// `UpdateCacheReservation` function
|
||||
virtual Status UpdateCacheReservation(std::size_t memory_used_delta,
|
||||
bool increase) = 0;
|
||||
virtual Status MakeCacheReservation(
|
||||
std::size_t incremental_memory_used,
|
||||
std::unique_ptr<CacheReservationManager::CacheReservationHandle>
|
||||
@ -128,6 +134,11 @@ class CacheReservationManagerImpl
|
||||
// On keeping dummy entries the same, it always returns Status::OK().
|
||||
Status UpdateCacheReservation(std::size_t new_memory_used) override;
|
||||
|
||||
Status UpdateCacheReservation(std::size_t /* memory_used_delta */,
|
||||
bool /* increase */) override {
|
||||
return Status::NotSupported();
|
||||
}
|
||||
|
||||
// One of the two ways of reserving cache space and releasing is done through
|
||||
// destruction of CacheReservationHandle.
|
||||
// See UpdateCacheReservation() for the other way.
|
||||
@ -254,6 +265,23 @@ class ConcurrentCacheReservationManager
|
||||
std::lock_guard<std::mutex> lock(cache_res_mgr_mu_);
|
||||
return cache_res_mgr_->UpdateCacheReservation(new_memory_used);
|
||||
}
|
||||
|
||||
inline Status UpdateCacheReservation(std::size_t memory_used_delta,
|
||||
bool increase) override {
|
||||
std::lock_guard<std::mutex> lock(cache_res_mgr_mu_);
|
||||
std::size_t total_mem_used = cache_res_mgr_->GetTotalMemoryUsed();
|
||||
Status s;
|
||||
if (!increase) {
|
||||
assert(total_mem_used >= memory_used_delta);
|
||||
s = cache_res_mgr_->UpdateCacheReservation(total_mem_used -
|
||||
memory_used_delta);
|
||||
} else {
|
||||
s = cache_res_mgr_->UpdateCacheReservation(total_mem_used +
|
||||
memory_used_delta);
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
inline Status MakeCacheReservation(
|
||||
std::size_t incremental_memory_used,
|
||||
std::unique_ptr<CacheReservationManager::CacheReservationHandle> *handle)
|
||||
|
@ -619,6 +619,26 @@ ColumnFamilyData::ColumnFamilyData(
|
||||
}
|
||||
|
||||
RecalculateWriteStallConditions(mutable_cf_options_);
|
||||
|
||||
if (cf_options.table_factory->IsInstanceOf(
|
||||
TableFactory::kBlockBasedTableName()) &&
|
||||
cf_options.table_factory->GetOptions<BlockBasedTableOptions>()) {
|
||||
const BlockBasedTableOptions* bbto =
|
||||
cf_options.table_factory->GetOptions<BlockBasedTableOptions>();
|
||||
const auto& options_overrides = bbto->cache_usage_options.options_overrides;
|
||||
const auto file_metadata_charged =
|
||||
options_overrides.at(CacheEntryRole::kFileMetadata).charged;
|
||||
if (bbto->block_cache &&
|
||||
file_metadata_charged == CacheEntryRoleOptions::Decision::kEnabled) {
|
||||
// TODO(hx235): Add a `ConcurrentCacheReservationManager` at DB scope
|
||||
// responsible for reservation of `ObsoleteFileInfo` so that we can keep
|
||||
// this `file_metadata_cache_res_mgr_` nonconcurrent
|
||||
file_metadata_cache_res_mgr_.reset(new ConcurrentCacheReservationManager(
|
||||
std::make_shared<
|
||||
CacheReservationManagerImpl<CacheEntryRole::kFileMetadata>>(
|
||||
bbto->block_cache)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// DB mutex held
|
||||
|
@ -14,6 +14,7 @@
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include "cache/cache_reservation_manager.h"
|
||||
#include "db/memtable_list.h"
|
||||
#include "db/table_cache.h"
|
||||
#include "db/table_properties_collector.h"
|
||||
@ -520,6 +521,10 @@ class ColumnFamilyData {
|
||||
|
||||
ThreadLocalPtr* TEST_GetLocalSV() { return local_sv_.get(); }
|
||||
WriteBufferManager* write_buffer_mgr() { return write_buffer_manager_; }
|
||||
std::shared_ptr<CacheReservationManager>
|
||||
GetFileMetadataCacheReservationManager() {
|
||||
return file_metadata_cache_res_mgr_;
|
||||
}
|
||||
|
||||
static const uint32_t kDummyColumnFamilyDataId;
|
||||
|
||||
@ -618,6 +623,10 @@ class ColumnFamilyData {
|
||||
bool db_paths_registered_;
|
||||
|
||||
std::string full_history_ts_low_;
|
||||
|
||||
// For charging memory usage of file metadata created for newly added files to
|
||||
// a Version associated with this CFD
|
||||
std::shared_ptr<CacheReservationManager> file_metadata_cache_res_mgr_;
|
||||
};
|
||||
|
||||
// ColumnFamilySet has interesting thread-safety requirements
|
||||
|
@ -1744,5 +1744,6 @@ template class TargetCacheChargeTrackingCache<
|
||||
CacheEntryRole::kFilterConstruction>;
|
||||
template class TargetCacheChargeTrackingCache<
|
||||
CacheEntryRole::kBlockBasedTableReader>;
|
||||
template class TargetCacheChargeTrackingCache<CacheEntryRole::kFileMetadata>;
|
||||
|
||||
} // namespace ROCKSDB_NAMESPACE
|
||||
|
@ -320,7 +320,7 @@ class InternalKey {
|
||||
}
|
||||
|
||||
Slice user_key() const { return ExtractUserKey(rep_); }
|
||||
size_t size() { return rep_.size(); }
|
||||
size_t size() const { return rep_.size(); }
|
||||
|
||||
void Set(const Slice& _user_key, SequenceNumber s, ValueType t) {
|
||||
SetFrom(ParsedInternalKey(_user_key, s, t));
|
||||
|
@ -23,6 +23,7 @@
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "cache/cache_reservation_manager.h"
|
||||
#include "db/blob/blob_file_meta.h"
|
||||
#include "db/dbformat.h"
|
||||
#include "db/internal_stats.h"
|
||||
@ -255,10 +256,13 @@ class VersionBuilder::Rep {
|
||||
// version edits.
|
||||
std::map<uint64_t, MutableBlobFileMetaData> mutable_blob_file_metas_;
|
||||
|
||||
std::shared_ptr<CacheReservationManager> file_metadata_cache_res_mgr_;
|
||||
|
||||
public:
|
||||
Rep(const FileOptions& file_options, const ImmutableCFOptions* ioptions,
|
||||
TableCache* table_cache, VersionStorageInfo* base_vstorage,
|
||||
VersionSet* version_set)
|
||||
VersionSet* version_set,
|
||||
std::shared_ptr<CacheReservationManager> file_metadata_cache_res_mgr)
|
||||
: file_options_(file_options),
|
||||
ioptions_(ioptions),
|
||||
table_cache_(table_cache),
|
||||
@ -266,7 +270,8 @@ class VersionBuilder::Rep {
|
||||
version_set_(version_set),
|
||||
num_levels_(base_vstorage->num_levels()),
|
||||
has_invalid_levels_(false),
|
||||
level_nonzero_cmp_(base_vstorage_->InternalComparator()) {
|
||||
level_nonzero_cmp_(base_vstorage_->InternalComparator()),
|
||||
file_metadata_cache_res_mgr_(file_metadata_cache_res_mgr) {
|
||||
assert(ioptions_);
|
||||
|
||||
levels_ = new LevelState[num_levels_];
|
||||
@ -291,6 +296,12 @@ class VersionBuilder::Rep {
|
||||
table_cache_->ReleaseHandle(f->table_reader_handle);
|
||||
f->table_reader_handle = nullptr;
|
||||
}
|
||||
|
||||
if (file_metadata_cache_res_mgr_) {
|
||||
Status s = file_metadata_cache_res_mgr_->UpdateCacheReservation(
|
||||
f->ApproximateMemoryUsage(), false /* increase */);
|
||||
s.PermitUncheckedError();
|
||||
}
|
||||
delete f;
|
||||
}
|
||||
}
|
||||
@ -763,6 +774,22 @@ class VersionBuilder::Rep {
|
||||
FileMetaData* const f = new FileMetaData(meta);
|
||||
f->refs = 1;
|
||||
|
||||
if (file_metadata_cache_res_mgr_) {
|
||||
Status s = file_metadata_cache_res_mgr_->UpdateCacheReservation(
|
||||
f->ApproximateMemoryUsage(), true /* increase */);
|
||||
if (!s.ok()) {
|
||||
delete f;
|
||||
s = Status::MemoryLimit(
|
||||
"Can't allocate " +
|
||||
kCacheEntryRoleToCamelString[static_cast<std::uint32_t>(
|
||||
CacheEntryRole::kFileMetadata)] +
|
||||
" due to exceeding the memory limit "
|
||||
"based on "
|
||||
"cache capacity");
|
||||
return s;
|
||||
}
|
||||
}
|
||||
|
||||
auto& add_files = level_state.added_files;
|
||||
assert(add_files.find(file_number) == add_files.end());
|
||||
add_files.emplace(file_number, f);
|
||||
@ -1239,13 +1266,13 @@ class VersionBuilder::Rep {
|
||||
}
|
||||
};
|
||||
|
||||
VersionBuilder::VersionBuilder(const FileOptions& file_options,
|
||||
const ImmutableCFOptions* ioptions,
|
||||
TableCache* table_cache,
|
||||
VersionStorageInfo* base_vstorage,
|
||||
VersionSet* version_set)
|
||||
VersionBuilder::VersionBuilder(
|
||||
const FileOptions& file_options, const ImmutableCFOptions* ioptions,
|
||||
TableCache* table_cache, VersionStorageInfo* base_vstorage,
|
||||
VersionSet* version_set,
|
||||
std::shared_ptr<CacheReservationManager> file_metadata_cache_res_mgr)
|
||||
: rep_(new Rep(file_options, ioptions, table_cache, base_vstorage,
|
||||
version_set)) {}
|
||||
version_set, file_metadata_cache_res_mgr)) {}
|
||||
|
||||
VersionBuilder::~VersionBuilder() = default;
|
||||
|
||||
@ -1280,7 +1307,8 @@ BaseReferencedVersionBuilder::BaseReferencedVersionBuilder(
|
||||
: version_builder_(new VersionBuilder(
|
||||
cfd->current()->version_set()->file_options(), cfd->ioptions(),
|
||||
cfd->table_cache(), cfd->current()->storage_info(),
|
||||
cfd->current()->version_set())),
|
||||
cfd->current()->version_set(),
|
||||
cfd->GetFileMetadataCacheReservationManager())),
|
||||
version_(cfd->current()) {
|
||||
version_->Ref();
|
||||
}
|
||||
@ -1289,7 +1317,8 @@ BaseReferencedVersionBuilder::BaseReferencedVersionBuilder(
|
||||
ColumnFamilyData* cfd, Version* v)
|
||||
: version_builder_(new VersionBuilder(
|
||||
cfd->current()->version_set()->file_options(), cfd->ioptions(),
|
||||
cfd->table_cache(), v->storage_info(), v->version_set())),
|
||||
cfd->table_cache(), v->storage_info(), v->version_set(),
|
||||
cfd->GetFileMetadataCacheReservationManager())),
|
||||
version_(v) {
|
||||
assert(version_ != cfd->current());
|
||||
}
|
||||
|
@ -25,6 +25,7 @@ class InternalStats;
|
||||
class Version;
|
||||
class VersionSet;
|
||||
class ColumnFamilyData;
|
||||
class CacheReservationManager;
|
||||
|
||||
// A helper class so we can efficiently apply a whole sequence
|
||||
// of edits to a particular state without creating intermediate
|
||||
@ -33,7 +34,9 @@ class VersionBuilder {
|
||||
public:
|
||||
VersionBuilder(const FileOptions& file_options,
|
||||
const ImmutableCFOptions* ioptions, TableCache* table_cache,
|
||||
VersionStorageInfo* base_vstorage, VersionSet* version_set);
|
||||
VersionStorageInfo* base_vstorage, VersionSet* version_set,
|
||||
std::shared_ptr<CacheReservationManager>
|
||||
file_metadata_cache_res_mgr = nullptr);
|
||||
~VersionBuilder();
|
||||
|
||||
bool CheckConsistencyForNumLevels();
|
||||
|
@ -19,6 +19,7 @@
|
||||
#include "db/dbformat.h"
|
||||
#include "db/wal_edit.h"
|
||||
#include "memory/arena.h"
|
||||
#include "port/malloc.h"
|
||||
#include "rocksdb/advanced_options.h"
|
||||
#include "rocksdb/cache.h"
|
||||
#include "table/table_reader.h"
|
||||
@ -293,6 +294,25 @@ struct FileMetaData {
|
||||
}
|
||||
return kUnknownFileCreationTime;
|
||||
}
|
||||
|
||||
// WARNING: manual update to this function is needed
|
||||
// whenever a new string property is added to FileMetaData
|
||||
// to reduce approximation error.
|
||||
//
|
||||
// TODO: eliminate the need of manually updating this function
|
||||
// for new string properties
|
||||
size_t ApproximateMemoryUsage() const {
|
||||
size_t usage = 0;
|
||||
#ifdef ROCKSDB_MALLOC_USABLE_SIZE
|
||||
usage += malloc_usable_size(const_cast<FileMetaData*>(this));
|
||||
#else
|
||||
usage += sizeof(*this);
|
||||
#endif // ROCKSDB_MALLOC_USABLE_SIZE
|
||||
usage += smallest.size() + largest.size() + file_checksum.size() +
|
||||
file_checksum_func_name.size() + min_timestamp.size() +
|
||||
max_timestamp.size();
|
||||
return usage;
|
||||
}
|
||||
};
|
||||
|
||||
// A compressed copy of file meta data that just contain minimum data needed
|
||||
|
@ -775,7 +775,8 @@ Version::~Version() {
|
||||
uint32_t path_id = f->fd.GetPathId();
|
||||
assert(path_id < cfd_->ioptions()->cf_paths.size());
|
||||
vset_->obsolete_files_.push_back(
|
||||
ObsoleteFileInfo(f, cfd_->ioptions()->cf_paths[path_id].path));
|
||||
ObsoleteFileInfo(f, cfd_->ioptions()->cf_paths[path_id].path,
|
||||
cfd_->GetFileMetadataCacheReservationManager()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -699,8 +699,13 @@ struct ObsoleteFileInfo {
|
||||
|
||||
ObsoleteFileInfo() noexcept
|
||||
: metadata(nullptr), only_delete_metadata(false) {}
|
||||
ObsoleteFileInfo(FileMetaData* f, const std::string& file_path)
|
||||
: metadata(f), path(file_path), only_delete_metadata(false) {}
|
||||
ObsoleteFileInfo(FileMetaData* f, const std::string& file_path,
|
||||
std::shared_ptr<CacheReservationManager>
|
||||
file_metadata_cache_res_mgr_arg = nullptr)
|
||||
: metadata(f),
|
||||
path(file_path),
|
||||
only_delete_metadata(false),
|
||||
file_metadata_cache_res_mgr(file_metadata_cache_res_mgr_arg) {}
|
||||
|
||||
ObsoleteFileInfo(const ObsoleteFileInfo&) = delete;
|
||||
ObsoleteFileInfo& operator=(const ObsoleteFileInfo&) = delete;
|
||||
@ -713,13 +718,23 @@ struct ObsoleteFileInfo {
|
||||
path = std::move(rhs.path);
|
||||
metadata = rhs.metadata;
|
||||
rhs.metadata = nullptr;
|
||||
file_metadata_cache_res_mgr = rhs.file_metadata_cache_res_mgr;
|
||||
rhs.file_metadata_cache_res_mgr = nullptr;
|
||||
|
||||
return *this;
|
||||
}
|
||||
void DeleteMetadata() {
|
||||
if (file_metadata_cache_res_mgr) {
|
||||
Status s = file_metadata_cache_res_mgr->UpdateCacheReservation(
|
||||
metadata->ApproximateMemoryUsage(), false /* increase */);
|
||||
s.PermitUncheckedError();
|
||||
}
|
||||
delete metadata;
|
||||
metadata = nullptr;
|
||||
}
|
||||
|
||||
private:
|
||||
std::shared_ptr<CacheReservationManager> file_metadata_cache_res_mgr;
|
||||
};
|
||||
|
||||
class ObsoleteBlobFileInfo {
|
||||
|
@ -12,6 +12,7 @@
|
||||
#include <algorithm>
|
||||
|
||||
#include "db/db_impl/db_impl.h"
|
||||
#include "db/db_test_util.h"
|
||||
#include "db/log_writer.h"
|
||||
#include "rocksdb/advanced_options.h"
|
||||
#include "rocksdb/convenience.h"
|
||||
@ -3446,6 +3447,124 @@ TEST_F(VersionSetTestMissingFiles, MinLogNumberToKeep2PC) {
|
||||
}
|
||||
}
|
||||
|
||||
class ChargeFileMetadataTest : public DBTestBase {
|
||||
public:
|
||||
ChargeFileMetadataTest()
|
||||
: DBTestBase("charge_file_metadata_test", /*env_do_fsync=*/true) {}
|
||||
};
|
||||
|
||||
class ChargeFileMetadataTestWithParam
|
||||
: public ChargeFileMetadataTest,
|
||||
public testing::WithParamInterface<CacheEntryRoleOptions::Decision> {
|
||||
public:
|
||||
ChargeFileMetadataTestWithParam() {}
|
||||
};
|
||||
|
||||
#ifndef ROCKSDB_LITE
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
ChargeFileMetadataTestWithParam, ChargeFileMetadataTestWithParam,
|
||||
::testing::Values(CacheEntryRoleOptions::Decision::kEnabled,
|
||||
CacheEntryRoleOptions::Decision::kDisabled));
|
||||
|
||||
TEST_P(ChargeFileMetadataTestWithParam, Basic) {
|
||||
Options options;
|
||||
BlockBasedTableOptions table_options;
|
||||
CacheEntryRoleOptions::Decision charge_file_metadata = GetParam();
|
||||
table_options.cache_usage_options.options_overrides.insert(
|
||||
{CacheEntryRole::kFileMetadata, {/*.charged = */ charge_file_metadata}});
|
||||
std::shared_ptr<TargetCacheChargeTrackingCache<CacheEntryRole::kFileMetadata>>
|
||||
file_metadata_charge_only_cache = std::make_shared<
|
||||
TargetCacheChargeTrackingCache<CacheEntryRole::kFileMetadata>>(
|
||||
NewLRUCache(
|
||||
4 * CacheReservationManagerImpl<
|
||||
CacheEntryRole::kFileMetadata>::GetDummyEntrySize(),
|
||||
0 /* num_shard_bits */, true /* strict_capacity_limit */));
|
||||
table_options.block_cache = file_metadata_charge_only_cache;
|
||||
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
||||
options.create_if_missing = true;
|
||||
options.disable_auto_compactions = true;
|
||||
DestroyAndReopen(options);
|
||||
|
||||
// Create 128 file metadata, each of which is roughly 1024 bytes.
|
||||
// This results in 1 *
|
||||
// CacheReservationManagerImpl<CacheEntryRole::kFileMetadata>::GetDummyEntrySize()
|
||||
// cache reservation for file metadata.
|
||||
for (int i = 1; i <= 128; ++i) {
|
||||
ASSERT_OK(Put(std::string(1024, 'a'), "va"));
|
||||
ASSERT_OK(Put("b", "vb"));
|
||||
ASSERT_OK(Flush());
|
||||
}
|
||||
if (charge_file_metadata == CacheEntryRoleOptions::Decision::kEnabled) {
|
||||
EXPECT_EQ(file_metadata_charge_only_cache->GetCacheCharge(),
|
||||
1 * CacheReservationManagerImpl<
|
||||
CacheEntryRole::kFileMetadata>::GetDummyEntrySize());
|
||||
|
||||
} else {
|
||||
EXPECT_EQ(file_metadata_charge_only_cache->GetCacheCharge(), 0);
|
||||
}
|
||||
|
||||
// Create another 128 file metadata.
|
||||
// This increases the file metadata cache reservation to 2 *
|
||||
// CacheReservationManagerImpl<CacheEntryRole::kFileMetadata>::GetDummyEntrySize().
|
||||
for (int i = 1; i <= 128; ++i) {
|
||||
ASSERT_OK(Put(std::string(1024, 'a'), "vva"));
|
||||
ASSERT_OK(Put("b", "vvb"));
|
||||
ASSERT_OK(Flush());
|
||||
}
|
||||
if (charge_file_metadata == CacheEntryRoleOptions::Decision::kEnabled) {
|
||||
EXPECT_EQ(file_metadata_charge_only_cache->GetCacheCharge(),
|
||||
2 * CacheReservationManagerImpl<
|
||||
CacheEntryRole::kFileMetadata>::GetDummyEntrySize());
|
||||
} else {
|
||||
EXPECT_EQ(file_metadata_charge_only_cache->GetCacheCharge(), 0);
|
||||
}
|
||||
// Compaction will create 1 new file metadata, obsolete and delete all 256
|
||||
// file metadata above. This results in 1 *
|
||||
// CacheReservationManagerImpl<CacheEntryRole::kFileMetadata>::GetDummyEntrySize()
|
||||
// cache reservation for file metadata.
|
||||
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
|
||||
ASSERT_EQ("0,1", FilesPerLevel(0));
|
||||
|
||||
if (charge_file_metadata == CacheEntryRoleOptions::Decision::kEnabled) {
|
||||
EXPECT_EQ(file_metadata_charge_only_cache->GetCacheCharge(),
|
||||
1 * CacheReservationManagerImpl<
|
||||
CacheEntryRole::kFileMetadata>::GetDummyEntrySize());
|
||||
} else {
|
||||
EXPECT_EQ(file_metadata_charge_only_cache->GetCacheCharge(), 0);
|
||||
}
|
||||
|
||||
// Destroying the db will delete the remaining 1 new file metadata
|
||||
// This results in no cache reservation for file metadata.
|
||||
Destroy(options);
|
||||
EXPECT_EQ(file_metadata_charge_only_cache->GetCacheCharge(),
|
||||
0 * CacheReservationManagerImpl<
|
||||
CacheEntryRole::kFileMetadata>::GetDummyEntrySize());
|
||||
|
||||
// Reopen the db with a smaller cache in order to test failure in allocating
|
||||
// file metadata due to memory limit based on cache capacity
|
||||
file_metadata_charge_only_cache = std::make_shared<
|
||||
TargetCacheChargeTrackingCache<CacheEntryRole::kFileMetadata>>(
|
||||
NewLRUCache(1 * CacheReservationManagerImpl<
|
||||
CacheEntryRole::kFileMetadata>::GetDummyEntrySize(),
|
||||
0 /* num_shard_bits */, true /* strict_capacity_limit */));
|
||||
table_options.block_cache = file_metadata_charge_only_cache;
|
||||
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
||||
Reopen(options);
|
||||
ASSERT_OK(Put(std::string(1024, 'a'), "va"));
|
||||
ASSERT_OK(Put("b", "vb"));
|
||||
Status s = Flush();
|
||||
if (charge_file_metadata == CacheEntryRoleOptions::Decision::kEnabled) {
|
||||
EXPECT_TRUE(s.IsMemoryLimit());
|
||||
EXPECT_TRUE(s.ToString().find(
|
||||
kCacheEntryRoleToCamelString[static_cast<std::uint32_t>(
|
||||
CacheEntryRole::kFileMetadata)]) != std::string::npos);
|
||||
EXPECT_TRUE(s.ToString().find("memory limit based on cache capacity") !=
|
||||
std::string::npos);
|
||||
} else {
|
||||
EXPECT_TRUE(s.ok());
|
||||
}
|
||||
}
|
||||
#endif // ROCKSDB_LITE
|
||||
} // namespace ROCKSDB_NAMESPACE
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
|
@ -139,6 +139,7 @@ DECLARE_bool(cache_index_and_filter_blocks);
|
||||
DECLARE_bool(charge_compression_dictionary_building_buffer);
|
||||
DECLARE_bool(charge_filter_construction);
|
||||
DECLARE_bool(charge_table_reader);
|
||||
DECLARE_bool(charge_file_metadata);
|
||||
DECLARE_int32(top_level_index_pinning);
|
||||
DECLARE_int32(partition_pinning);
|
||||
DECLARE_int32(unpartitioned_pinning);
|
||||
|
@ -325,6 +325,11 @@ DEFINE_bool(charge_table_reader, false,
|
||||
"CacheEntryRoleOptions::charged of"
|
||||
"CacheEntryRole::kBlockBasedTableReader");
|
||||
|
||||
DEFINE_bool(charge_file_metadata, false,
|
||||
"Setting for "
|
||||
"CacheEntryRoleOptions::charged of"
|
||||
"kFileMetadata");
|
||||
|
||||
DEFINE_int32(
|
||||
top_level_index_pinning,
|
||||
static_cast<int32_t>(ROCKSDB_NAMESPACE::PinningTier::kFallback),
|
||||
|
@ -2766,6 +2766,11 @@ void InitializeOptionsFromFlags(
|
||||
{/*.charged = */ FLAGS_charge_table_reader
|
||||
? CacheEntryRoleOptions::Decision::kEnabled
|
||||
: CacheEntryRoleOptions::Decision::kDisabled}});
|
||||
block_based_options.cache_usage_options.options_overrides.insert(
|
||||
{CacheEntryRole::kFileMetadata,
|
||||
{/*.charged = */ FLAGS_charge_file_metadata
|
||||
? CacheEntryRoleOptions::Decision::kEnabled
|
||||
: CacheEntryRoleOptions::Decision::kDisabled}});
|
||||
block_based_options.format_version =
|
||||
static_cast<uint32_t>(FLAGS_format_version);
|
||||
block_based_options.index_block_restart_interval =
|
||||
|
@ -9,8 +9,8 @@
|
||||
|
||||
#ifdef GFLAGS
|
||||
#include "db_stress_tool/db_stress_common.h"
|
||||
#include "utilities/fault_injection_fs.h"
|
||||
#include "rocksdb/utilities/transaction_db.h"
|
||||
#include "utilities/fault_injection_fs.h"
|
||||
|
||||
namespace ROCKSDB_NAMESPACE {
|
||||
class NonBatchedOpsStressTest : public StressTest {
|
||||
|
@ -570,6 +570,9 @@ enum class CacheEntryRole {
|
||||
// BlockBasedTableReader's charge to account for
|
||||
// its memory usage
|
||||
kBlockBasedTableReader,
|
||||
// FileMetadata's charge to account for
|
||||
// its memory usage
|
||||
kFileMetadata,
|
||||
// Default bucket, for miscellaneous cache entries. Do not use for
|
||||
// entries that could potentially add up to large usage.
|
||||
kMisc,
|
||||
|
@ -370,7 +370,20 @@ struct BlockBasedTableOptions {
|
||||
// (iii) Compatible existing behavior:
|
||||
// Same as kDisabled.
|
||||
//
|
||||
// (d) Other CacheEntryRole
|
||||
// (d) CacheEntryRole::kFileMetadata
|
||||
// (i) If kEnabled:
|
||||
// Charge memory usage of file metadata. RocksDB holds one file metadata
|
||||
// structure in-memory per on-disk table file.
|
||||
// If such file metadata's
|
||||
// memory exceeds the avaible space left in the block cache at some point
|
||||
// (i.e, causing a cache full under `LRUCacheOptions::strict_capacity_limit` =
|
||||
// true), creation will fail with Status::MemoryLimit().
|
||||
// (ii) If kDisabled:
|
||||
// Does not charge the memory usage mentioned above.
|
||||
// (iii) Compatible existing behavior:
|
||||
// Same as kDisabled.
|
||||
//
|
||||
// (e) Other CacheEntryRole
|
||||
// Not supported.
|
||||
// `Status::kNotSupported` will be returned if
|
||||
// `CacheEntryRoleOptions::charged` is set to {`kEnabled`, `kDisabled`}.
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
package org.rocksdb;
|
||||
|
||||
import static java.nio.charset.StandardCharsets.UTF_8;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.ArrayList;
|
||||
@ -14,8 +16,6 @@ import java.util.Map;
|
||||
import java.util.concurrent.atomic.AtomicReference;
|
||||
import org.rocksdb.util.Environment;
|
||||
|
||||
import static java.nio.charset.StandardCharsets.UTF_8;
|
||||
|
||||
/**
|
||||
* A RocksDB is a persistent ordered map from keys to values. It is safe for
|
||||
* concurrent access from multiple threads without any external synchronization.
|
||||
|
@ -695,7 +695,7 @@ Status BlockBasedTableFactory::ValidateOptions(
|
||||
static const std::set<CacheEntryRole> kMemoryChargingSupported = {
|
||||
CacheEntryRole::kCompressionDictionaryBuildingBuffer,
|
||||
CacheEntryRole::kFilterConstruction,
|
||||
CacheEntryRole::kBlockBasedTableReader};
|
||||
CacheEntryRole::kBlockBasedTableReader, CacheEntryRole::kFileMetadata};
|
||||
if (options.charged != CacheEntryRoleOptions::Decision::kFallback &&
|
||||
kMemoryChargingSupported.count(role) == 0) {
|
||||
return Status::NotSupported(
|
||||
|
@ -1157,6 +1157,11 @@ DEFINE_bool(charge_table_reader, false,
|
||||
"CacheEntryRoleOptions::charged of"
|
||||
"CacheEntryRole::kBlockBasedTableReader");
|
||||
|
||||
DEFINE_bool(charge_file_metadata, false,
|
||||
"Setting for "
|
||||
"CacheEntryRoleOptions::charged of"
|
||||
"CacheEntryRole::kFileMetadata");
|
||||
|
||||
DEFINE_uint64(backup_rate_limit, 0ull,
|
||||
"If non-zero, db_bench will rate limit reads and writes for DB "
|
||||
"backup. This "
|
||||
@ -4242,6 +4247,11 @@ class Benchmark {
|
||||
{/*.charged = */ FLAGS_charge_table_reader
|
||||
? CacheEntryRoleOptions::Decision::kEnabled
|
||||
: CacheEntryRoleOptions::Decision::kDisabled}});
|
||||
block_based_options.cache_usage_options.options_overrides.insert(
|
||||
{CacheEntryRole::kFileMetadata,
|
||||
{/*.charged = */ FLAGS_charge_file_metadata
|
||||
? CacheEntryRoleOptions::Decision::kEnabled
|
||||
: CacheEntryRoleOptions::Decision::kDisabled}});
|
||||
block_based_options.block_cache_compressed = compressed_cache_;
|
||||
block_based_options.block_size = FLAGS_block_size;
|
||||
block_based_options.block_restart_interval = FLAGS_block_restart_interval;
|
||||
|
@ -44,6 +44,7 @@ default_params = {
|
||||
"charge_compression_dictionary_building_buffer": lambda: random.choice([0, 1]),
|
||||
"charge_filter_construction": lambda: random.choice([0, 1]),
|
||||
"charge_table_reader": lambda: random.choice([0, 1]),
|
||||
"charge_file_metadata": lambda: random.choice([0, 1]),
|
||||
"checkpoint_one_in": 1000000,
|
||||
"compression_type": lambda: random.choice(
|
||||
["none", "snappy", "zlib", "lz4", "lz4hc", "xpress", "zstd"]),
|
||||
|
Loading…
x
Reference in New Issue
Block a user