mirror of
https://github.com/facebook/rocksdb.git
synced 2025-04-20 13:52:03 +08:00
Inject spurious wakeup and sleep before acquiring db mutex to expose race condition (#10291)
Summary: **Context/Summary:** Previous experience with bugs and flaky tests taught us there exist features in RocksDB vulnerable to race condition caused by acquiring db mutex at a particular timing. This PR aggressively exposes those vulnerable features by injecting spurious wakeup and sleep to cause acquiring db mutex at various timing in order to expose such race condition **Testing:** - `COERCE_CONTEXT_SWITCH=1 make -j56 check / make -j56 db_stress` should reveal - flaky tests caused by db mutex related race condition - Reverted https://github.com/facebook/rocksdb/pull/9528 - A/B testing on `COMPILE_WITH_TSAN=1 make -j56 listener_test` w/ and w/o `COERCE_CONTEXT_SWITCH=1` followed by `./listener_test --gtest_filter=EventListenerTest.MultiCF --gtest_repeat=10` - `COERCE_CONTEXT_SWITCH=1` can cause expected test failure (i.e, expose target TSAN data race error) within 10 run while the other couldn't. - This proves our injection can expose flaky tests caused by db mutex related race condition faster. - known or new race-condition-type of internal bug by continuously running this PR - Performance - High ops-threads time: COERCE_CONTEXT_SWITCH=1 regressed by 4 times slower (2:01.16 vs 0:22.10 elapsed ). This PR will be run as a separate CI job so this regression won't affect any existing job. ``` TEST_TMPDIR=$db /usr/bin/time ./db_stress \ --ops_per_thread=100000 --expected_values_dir=$exp --clear_column_family_one_in=0 \ --write_buffer_size=524288 —target_file_size_base=524288 —ingest_external_file_one_in=100 —compact_files_one_in=1000 —compact_range_one_in=1000 ``` - Start-up time: COERCE_CONTEXT_SWITCH=1 didn't regress by 25% (0:01.51 vs 0:01.29 elapsed) ``` TEST_TMPDIR=$db ./db_stress -ops_per_thread=100000000 -expected_values_dir=$exp --clear_column_family_one_in=0 & sleep 120; pkill -9 db_stress TEST_TMPDIR=$db /usr/bin/time ./db_stress \ --ops_per_thread=1 -reopen=0 --expected_values_dir=$exp --clear_column_family_one_in=0 --destroy_db_initially=0 ``` Pull Request resolved: https://github.com/facebook/rocksdb/pull/10291 Reviewed By: ajkr Differential Revision: D39231182 Pulled By: hx235 fbshipit-source-id: 7ab6695430460e0826727fd8c66679b32b3e44b6
This commit is contained in:
parent
be09943fb5
commit
f79b3d19a7
10
Makefile
10
Makefile
@ -190,6 +190,16 @@ ifeq ($(USE_LTO), 1)
|
||||
LDFLAGS += -flto -fuse-linker-plugin
|
||||
endif
|
||||
|
||||
# `COERCE_CONTEXT_SWITCH=1` will inject spurious wakeup and
|
||||
# random length of sleep or context switch at critical
|
||||
# points (e.g, before acquring db mutex) in RocksDB.
|
||||
# In this way, it coerces as many excution orders as possible in the hope of
|
||||
# exposing the problematic excution order
|
||||
COERCE_CONTEXT_SWITCH ?= 0
|
||||
ifeq ($(COERCE_CONTEXT_SWITCH), 1)
|
||||
OPT += -DCOERCE_CONTEXT_SWITCH
|
||||
endif
|
||||
|
||||
#-----------------------------------------------
|
||||
include src.mk
|
||||
|
||||
|
@ -165,8 +165,13 @@ DBImpl::DBImpl(const DBOptions& options, const std::string& dbname,
|
||||
fs_(immutable_db_options_.fs, io_tracer_),
|
||||
mutable_db_options_(initial_db_options_),
|
||||
stats_(immutable_db_options_.stats),
|
||||
#ifdef COERCE_CONTEXT_SWITCH
|
||||
mutex_(stats_, immutable_db_options_.clock, DB_MUTEX_WAIT_MICROS, &bg_cv_,
|
||||
immutable_db_options_.use_adaptive_mutex),
|
||||
#else // COERCE_CONTEXT_SWITCH
|
||||
mutex_(stats_, immutable_db_options_.clock, DB_MUTEX_WAIT_MICROS,
|
||||
immutable_db_options_.use_adaptive_mutex),
|
||||
#endif // COERCE_CONTEXT_SWITCH
|
||||
default_cf_handle_(nullptr),
|
||||
error_handler_(this, immutable_db_options_, &mutex_),
|
||||
event_logger_(immutable_db_options_.info_log.get()),
|
||||
|
@ -34,6 +34,23 @@ void InstrumentedMutex::Lock() {
|
||||
void InstrumentedMutex::LockInternal() {
|
||||
#ifndef NDEBUG
|
||||
ThreadStatusUtil::TEST_StateDelay(ThreadStatus::STATE_MUTEX_WAIT);
|
||||
#endif
|
||||
#ifdef COERCE_CONTEXT_SWITCH
|
||||
if (stats_code_ == DB_MUTEX_WAIT_MICROS) {
|
||||
thread_local Random rnd(301);
|
||||
if (rnd.OneIn(2)) {
|
||||
if (bg_cv_) {
|
||||
bg_cv_->SignalAll();
|
||||
}
|
||||
sched_yield();
|
||||
} else {
|
||||
uint32_t sleep_us = rnd.Uniform(11) * 1000;
|
||||
if (bg_cv_) {
|
||||
bg_cv_->SignalAll();
|
||||
}
|
||||
SystemClock::Default()->SleepForMicroseconds(sleep_us);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
mutex_.Lock();
|
||||
}
|
||||
|
@ -32,11 +32,19 @@ class InstrumentedMutex {
|
||||
clock_(clock),
|
||||
stats_code_(stats_code) {}
|
||||
|
||||
#ifdef COERCE_CONTEXT_SWITCH
|
||||
InstrumentedMutex(Statistics* stats, SystemClock* clock, int stats_code,
|
||||
InstrumentedCondVar* bg_cv, bool adaptive = false)
|
||||
: mutex_(adaptive),
|
||||
stats_(stats),
|
||||
clock_(clock),
|
||||
stats_code_(stats_code),
|
||||
bg_cv_(bg_cv) {}
|
||||
#endif
|
||||
|
||||
void Lock();
|
||||
|
||||
void Unlock() {
|
||||
mutex_.Unlock();
|
||||
}
|
||||
void Unlock() { mutex_.Unlock(); }
|
||||
|
||||
void AssertHeld() {
|
||||
mutex_.AssertHeld();
|
||||
@ -49,6 +57,9 @@ class InstrumentedMutex {
|
||||
Statistics* stats_;
|
||||
SystemClock* clock_;
|
||||
int stats_code_;
|
||||
#ifdef COERCE_CONTEXT_SWITCH
|
||||
InstrumentedCondVar* bg_cv_ = nullptr;
|
||||
#endif
|
||||
};
|
||||
|
||||
class ALIGN_AS(CACHE_LINE_SIZE) CacheAlignedInstrumentedMutex
|
||||
|
Loading…
x
Reference in New Issue
Block a user