Do not pick SS with a colocated LR in ExcludeIncludeStorageServersWorkload (#11980)

This commit is contained in:
Syed Paymaan Raza 2025-02-27 15:23:32 -08:00 committed by GitHub
parent 9f094417a2
commit 7642ead228
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -30,8 +30,8 @@
// This test creates a scenario that large number of SS join the cluster, by excluding and including a random // This test creates a scenario that large number of SS join the cluster, by excluding and including a random
// SS consistently for many times. // SS consistently for many times.
// this test would quit early in two scenarios: // this test would quit early in two scenarios:
// 1. QuitEarlyNoEligibleSSToExclude: All storage servers are running alongside with a TLog, thus we cannot // 1. QuitEarlyNoEligibleSSToExclude: All storage servers are running alongside with a TLog or LogRouter, thus we
// exclude a SS, otherwise DB would be unavailable. (this is due to simulation uses UNSET process class) // cannot exclude a SS, otherwise DB would be unavailable. (this is due to simulation uses UNSET process class)
// 2. QuitEarlyNotCompleteServerExclude: Sometimes it takes too long for a SS exclusion to finish(disappear from // 2. QuitEarlyNotCompleteServerExclude: Sometimes it takes too long for a SS exclusion to finish(disappear from
// serverListKeys), there is a timeout_error when that happens, and we quit if it never succeeded( i.e.) // serverListKeys), there is a timeout_error when that happens, and we quit if it never succeeded( i.e.)
// It makes sense because the purpose of this test is to : // It makes sense because the purpose of this test is to :
@ -107,13 +107,25 @@ struct ExcludeIncludeStorageServersWorkload : TestWorkload {
} }
} }
// get all TLogs // get all TLogs and remove from SS candidate set
Optional<Standalone<StringRef>> value = wait(tr.get(logsKey)); Optional<Standalone<StringRef>> value = wait(tr.get(logsKey));
ASSERT(value.present()); ASSERT(value.present());
auto logs = decodeLogsValue(value.get()); auto logs = decodeLogsValue(value.get());
for (auto const& log : logs.first) { for (auto const& log : logs.first) {
servers.erase(AddressExclusion(log.second.ip, log.second.port)); servers.erase(AddressExclusion(log.second.ip, log.second.port));
} }
// get all log routers and remove from SS candidate set
for (const auto& tLogSet : self->dbInfo->get().logSystemConfig.tLogs) {
for (const auto& logRouter : tLogSet.logRouters) {
if (logRouter.present()) {
const auto& logRouterInterf = logRouter.interf();
servers.erase(
AddressExclusion(logRouterInterf.address().ip, logRouterInterf.address().port));
}
}
}
if (servers.empty()) { if (servers.empty()) {
// sometimes all SS are running alongside a TLog, cannot exclude any of them, so quit // sometimes all SS are running alongside a TLog, cannot exclude any of them, so quit
TraceEvent("QuitEarlyNoEligibleSSToExclude").log(); TraceEvent("QuitEarlyNoEligibleSSToExclude").log();