diff --git a/fdbcli/DataDistributionCommand.actor.cpp b/fdbcli/DataDistributionCommand.actor.cpp index 1cb667c812..7000bdf5c7 100644 --- a/fdbcli/DataDistributionCommand.actor.cpp +++ b/fdbcli/DataDistributionCommand.actor.cpp @@ -69,7 +69,7 @@ ACTOR Future setDDIgnoreRebalanceSwitch(Reference db, uint8_t D try { state ThreadFuture> resultFuture = tr->get(rebalanceDDIgnoreKey); Optional v = wait(safeThreadFutureToFuture(resultFuture)); - uint8_t oldValue = 0; // nothing is disabled + uint8_t oldValue = DDIgnore::NONE; // nothing is disabled if (v.present()) { if (v.get().size() > 0) { oldValue = BinaryReader::fromStringRef(v.get(), Unversioned()); diff --git a/fdbcli/StatusCommand.actor.cpp b/fdbcli/StatusCommand.actor.cpp index 98e7bdc377..47bbc6e4f8 100644 --- a/fdbcli/StatusCommand.actor.cpp +++ b/fdbcli/StatusCommand.actor.cpp @@ -1128,8 +1128,8 @@ void printStatus(StatusObjectReader statusObj, "storage server failures."; } if (statusObjCluster.has("data_distribution_disabled_for_rebalance")) { - outputString += "\n\nWARNING: Data distribution is currently turned on but shard size balancing is " - "currently disabled."; + outputString += "\n\nWARNING: Data distribution is currently turned on but one or both of shard " + "size and read-load based balancing are disabled."; // data_distribution_disabled_hex if (statusObjCluster.has("data_distribution_disabled_hex")) { outputString += " Ignore code: " + statusObjCluster["data_distribution_disabled_hex"].get_str(); diff --git a/fdbclient/ServerKnobs.h b/fdbclient/ServerKnobs.h index 0222cd9736..a969232c85 100644 --- a/fdbclient/ServerKnobs.h +++ b/fdbclient/ServerKnobs.h @@ -610,7 +610,7 @@ public: int64_t BYTES_READ_UNITS_PER_SAMPLE; int64_t READ_HOT_SUB_RANGE_CHUNK_SIZE; int64_t EMPTY_READ_PENALTY; - int DD_SHARD_COMPARE_LIMIT; + int DD_SHARD_COMPARE_LIMIT; // when read-aware DD is enabled, at most how many shards are compared together bool READ_SAMPLING_ENABLED; // Storage Server diff --git a/fdbclient/SystemData.cpp b/fdbclient/SystemData.cpp index bd867665ba..163d5f2862 100644 --- a/fdbclient/SystemData.cpp +++ b/fdbclient/SystemData.cpp @@ -302,8 +302,8 @@ std::pair>, std::vector decodeHealthyZoneValue(ValueRef const&); diff --git a/fdbserver/DataDistributionQueue.actor.cpp b/fdbserver/DataDistributionQueue.actor.cpp index 376d61d90e..b8438a9249 100644 --- a/fdbserver/DataDistributionQueue.actor.cpp +++ b/fdbserver/DataDistributionQueue.actor.cpp @@ -1513,6 +1513,7 @@ inline double getWorstCpu(const HealthMetrics& metrics, const std::vector& } else { // assume the server is too busy to report its stats cpu = std::max(cpu, 100.0); + break; } } return cpu; diff --git a/fdbserver/Status.actor.cpp b/fdbserver/Status.actor.cpp index 4486cc9921..86e145e342 100644 --- a/fdbserver/Status.actor.cpp +++ b/fdbserver/Status.actor.cpp @@ -1570,7 +1570,7 @@ struct LoadConfigurationResult { double healthyZoneSeconds; bool rebalanceDDIgnored; // FIXME: possible convert it to int if upgrade value can be resolved? - std::string rebalanceDDIgnoreHex; // any or combination of 0, 1, 2, see enum DDIgnore; + std::string rebalanceDDIgnoreHex; // any or combination of 0, 1, 2, see DDIgnore; bool dataDistributionDisabled; LoadConfigurationResult() diff --git a/fdbserver/TCInfo.actor.cpp b/fdbserver/TCInfo.actor.cpp index a9cf76b95e..41ba999a93 100644 --- a/fdbserver/TCInfo.actor.cpp +++ b/fdbserver/TCInfo.actor.cpp @@ -402,7 +402,7 @@ double TCTeamInfo::getLoadReadBandwidth(bool includeInFlight, double inflightPen } return (size == 0 ? 0 : sum / size) + // we don't need to divide the inflight bandwidth because when added it the bandwidth is from single server - (includeInFlight ? inflightPenalty * getReadInFlightToTeam() : 0); + (includeInFlight ? inflightPenalty * getReadInFlightToTeam() / servers.size() : 0); } int64_t TCTeamInfo::getMinAvailableSpace(bool includeInFlight) const { diff --git a/fdbserver/workloads/SpecialKeySpaceCorrectness.actor.cpp b/fdbserver/workloads/SpecialKeySpaceCorrectness.actor.cpp index 7a5fa2d3eb..a2f7893a6c 100644 --- a/fdbserver/workloads/SpecialKeySpaceCorrectness.actor.cpp +++ b/fdbserver/workloads/SpecialKeySpaceCorrectness.actor.cpp @@ -1275,13 +1275,21 @@ struct SpecialKeySpaceCorrectnessWorkload : TestWorkload { } } // set dd mode to 0 and disable DD for rebalance + state uint8_t ddIgnoreValue = DDIgnore::NONE; + if (deterministicRandom()->coinflip()) { + ddIgnoreValue |= DDIgnore::REBALANCE_READ; + } + if (deterministicRandom()->coinflip()) { + ddIgnoreValue |= DDIgnore::REBALANCE_DISK; + } loop { try { tx->setOption(FDBTransactionOptions::RAW_ACCESS); tx->setOption(FDBTransactionOptions::SPECIAL_KEY_SPACE_ENABLE_WRITES); KeyRef ddPrefix = SpecialKeySpace::getManagementApiCommandPrefix("datadistribution"); tx->set(LiteralStringRef("mode").withPrefix(ddPrefix), LiteralStringRef("0")); - tx->set(LiteralStringRef("rebalance_ignored").withPrefix(ddPrefix), Value()); + tx->set(LiteralStringRef("rebalance_ignored").withPrefix(ddPrefix), + BinaryWriter::toValue(ddIgnoreValue, Unversioned())); wait(tx->commit()); tx->reset(); break; @@ -1306,8 +1314,8 @@ struct SpecialKeySpaceCorrectnessWorkload : TestWorkload { ASSERT(BinaryReader::fromStringRef(val2.get(), Unversioned()) == 0); // check DD disabled for rebalance Optional val3 = wait(tx->get(rebalanceDDIgnoreKey)); - // default value "on" - ASSERT(val3.present()); + ASSERT(val3.present() && + BinaryReader::fromStringRef(val3.get(), Unversioned()) == ddIgnoreValue); tx->reset(); break; } catch (Error& e) { diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 64cf08cc8c..5426657bb1 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -186,7 +186,6 @@ if(WITH_PYTHON) add_fdb_test(TEST_FILES fast/WriteDuringRead.toml) add_fdb_test(TEST_FILES fast/WriteDuringReadClean.toml) add_fdb_test(TEST_FILES noSim/RandomUnitTests.toml UNIT) - add_fdb_test(TEST_FILES noSim/ReadSkewReadWrite.toml IGNORE) if (WITH_ROCKSDB_EXPERIMENTAL) add_fdb_test(TEST_FILES noSim/KeyValueStoreRocksDBTest.toml) add_fdb_test(TEST_FILES fast/PhysicalShardMove.toml) @@ -211,6 +210,7 @@ if(WITH_PYTHON) add_fdb_test(TEST_FILES rare/LargeApiCorrectnessStatus.toml) add_fdb_test(TEST_FILES rare/RYWDisable.toml) add_fdb_test(TEST_FILES rare/RandomReadWriteTest.toml) + add_fdb_test(TEST_FILES rare/ReadSkewReadWrite.toml) add_fdb_test(TEST_FILES rare/SpecificUnitTests.toml) add_fdb_test(TEST_FILES rare/SwizzledLargeApiCorrectness.toml) add_fdb_test(TEST_FILES rare/RedwoodCorrectnessBTree.toml) diff --git a/tests/noSim/ReadSkewReadWrite.toml b/tests/noSim/ReadSkewReadWrite.toml deleted file mode 100644 index 4542ef46d3..0000000000 --- a/tests/noSim/ReadSkewReadWrite.toml +++ /dev/null @@ -1,24 +0,0 @@ -[[test]] -testTitle = 'RandomReadWriteTest' -connectionFailuresDisableDuration = 100000 -# waitForQuiescenceBegin= false -# waitForQuiescenceEnd=false -clearAfterTest = false #true -runSetup = true # false -timeout = 3600.0 - -[[test.workload]] -testName = 'ReadWrite' -transactionsPerSecond = 100000 -testDuration = 900.0 -skewRound = 1 -nodeCount = 30000000 -valueBytes = 1000 -readsPerTransactionA = 8 -writesPerTransactionA = 0 -alpha = 0 -discardEdgeMeasurements = false -hotServerFraction = 0.2 -hotServerReadFrac = 0.8 -# hotServerShardFraction = 0.3 -warmingDelay = 180.0 \ No newline at end of file diff --git a/tests/rare/ReadSkewReadWrite.toml b/tests/rare/ReadSkewReadWrite.toml index 0c95c78c75..31d037e8a3 100644 --- a/tests/rare/ReadSkewReadWrite.toml +++ b/tests/rare/ReadSkewReadWrite.toml @@ -1,15 +1,24 @@ [[test]] testTitle = 'RandomReadWriteTest' -simCheckRelocationDuration = true connectionFailuresDisableDuration = 100000 +waitForQuiescenceBegin= false +waitForQuiescenceEnd=false +clearAfterTest = true +runSetup = true # false +timeout = 3600.0 - [[test.workload]] - testName = 'ReadWrite' - testDuration = 30.0 - skewRound = 1 - transactionsPerSecond = 2000 - nodeCount = 150000 - valueBytes = 128 - discardEdgeMeasurements = false - warmingDelay = 10.0 - hotServerFraction = 0.1 \ No newline at end of file +[[test.workload]] +testName = 'ReadWrite' +transactionsPerSecond = 100000 +testDuration = 400.0 +skewRound = 1 +nodeCount = 30000 # 30000000 +valueBytes = 100 +readsPerTransactionA = 8 +writesPerTransactionA = 0 +alpha = 0 +discardEdgeMeasurements = false +hotServerFraction = 0.2 +hotServerReadFrac = 0.8 +# hotServerShardFraction = 0.3 +warmingDelay = 180.0 \ No newline at end of file