mirror of
https://github.com/apple/foundationdb.git
synced 2025-05-14 18:02:31 +08:00
Improving troubleshooting of stopping the FDB client thread (#8629)
* Upgrade tests: dump thread call stacks of the tester process if it fails to terminate * ApiTester: log before and after stopping the network thread * Catch and print exceptions in closeTraceFile; Close trace file at the end of MVC runNetwork * Change trace event name for MVC runNetwork termination Co-authored-by: A.J. Beamon <aj.beamon@snowflake.com> Co-authored-by: A.J. Beamon <aj.beamon@snowflake.com>
This commit is contained in:
parent
9de72eb675
commit
18b852c4e4
@ -459,8 +459,10 @@ int main(int argc, char** argv) {
|
||||
retCode = 1;
|
||||
}
|
||||
|
||||
fprintf(stderr, "Stopping FDB network thread\n");
|
||||
fdb_check(fdb::network::stop(), "Failed to stop FDB thread");
|
||||
network_thread.join();
|
||||
fprintf(stderr, "FDB network thread successfully stopped\n");
|
||||
} catch (const std::exception& err) {
|
||||
fmt::print(stderr, "ERROR: {}\n", err.what());
|
||||
retCode = 1;
|
||||
|
@ -18,6 +18,7 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "flow/Trace.h"
|
||||
#ifdef ADDRESS_SANITIZER
|
||||
#include <sanitizer/lsan_interface.h>
|
||||
#endif
|
||||
@ -2812,11 +2813,19 @@ void MultiVersionApi::runNetwork() {
|
||||
});
|
||||
}
|
||||
|
||||
try {
|
||||
localClient->api->runNetwork();
|
||||
} catch (const Error& e) {
|
||||
closeTraceFile();
|
||||
throw e;
|
||||
}
|
||||
|
||||
for (auto h : handles) {
|
||||
waitThread(h);
|
||||
}
|
||||
|
||||
TraceEvent("MultiVersionRunNetworkTerminating");
|
||||
closeTraceFile();
|
||||
}
|
||||
|
||||
void MultiVersionApi::stopNetwork() {
|
||||
|
@ -743,10 +743,10 @@ void ThreadSafeApi::runNetwork() {
|
||||
Optional<Error> runErr;
|
||||
try {
|
||||
::runNetwork();
|
||||
} catch (Error& e) {
|
||||
} catch (const Error& e) {
|
||||
TraceEvent(SevError, "RunNetworkError").error(e);
|
||||
runErr = e;
|
||||
} catch (std::exception& e) {
|
||||
} catch (const std::exception& e) {
|
||||
runErr = unknown_error();
|
||||
TraceEvent(SevError, "RunNetworkError").error(unknown_error()).detail("RootException", e.what());
|
||||
} catch (...) {
|
||||
@ -757,9 +757,9 @@ void ThreadSafeApi::runNetwork() {
|
||||
for (auto& hook : threadCompletionHooks) {
|
||||
try {
|
||||
hook.first(hook.second);
|
||||
} catch (Error& e) {
|
||||
} catch (const Error& e) {
|
||||
TraceEvent(SevError, "NetworkShutdownHookError").error(e);
|
||||
} catch (std::exception& e) {
|
||||
} catch (const std::exception& e) {
|
||||
TraceEvent(SevError, "NetworkShutdownHookError").error(unknown_error()).detail("RootException", e.what());
|
||||
} catch (...) {
|
||||
TraceEvent(SevError, "NetworkShutdownHookError").error(unknown_error());
|
||||
@ -767,12 +767,10 @@ void ThreadSafeApi::runNetwork() {
|
||||
}
|
||||
|
||||
if (runErr.present()) {
|
||||
closeTraceFile();
|
||||
throw runErr.get();
|
||||
}
|
||||
|
||||
TraceEvent("RunNetworkTerminating");
|
||||
closeTraceFile();
|
||||
}
|
||||
|
||||
void ThreadSafeApi::stopNetwork() {
|
||||
|
@ -25,6 +25,7 @@
|
||||
#include "flow/JsonTraceLogFormatter.h"
|
||||
#include "flow/flow.h"
|
||||
#include "flow/DeterministicRandom.h"
|
||||
#include <exception>
|
||||
#include <stdlib.h>
|
||||
#include <stdarg.h>
|
||||
#include <cctype>
|
||||
@ -514,6 +515,7 @@ public:
|
||||
|
||||
void close() {
|
||||
if (opened) {
|
||||
try {
|
||||
MutexHolder hold(mutex);
|
||||
|
||||
// Write remaining contents
|
||||
@ -533,6 +535,9 @@ public:
|
||||
f.getBlocking();
|
||||
|
||||
opened = false;
|
||||
} catch (const std::exception& e) {
|
||||
fprintf(stderr, "Error closing trace file: %s\n", e.what());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -280,11 +280,13 @@ class UpgradeTest:
|
||||
os.close(self.ctrl_pipe)
|
||||
|
||||
# Kill the tester process if it is still alive
|
||||
def kill_tester_if_alive(self, workload_thread):
|
||||
def kill_tester_if_alive(self, workload_thread, dump_stacks):
|
||||
if not workload_thread.is_alive():
|
||||
return
|
||||
if self.tester_proc is not None:
|
||||
try:
|
||||
if dump_stacks:
|
||||
os.system("pstack {}".format(self.tester_proc.pid))
|
||||
print("Killing the tester process")
|
||||
self.tester_proc.kill()
|
||||
workload_thread.join(5)
|
||||
@ -310,11 +312,11 @@ class UpgradeTest:
|
||||
except Exception:
|
||||
print("Upgrade test failed")
|
||||
print(traceback.format_exc())
|
||||
self.kill_tester_if_alive(workload_thread)
|
||||
self.kill_tester_if_alive(workload_thread, False)
|
||||
finally:
|
||||
workload_thread.join(5)
|
||||
reader_thread.join(5)
|
||||
self.kill_tester_if_alive(workload_thread)
|
||||
self.kill_tester_if_alive(workload_thread, True)
|
||||
if test_retcode == 0:
|
||||
test_retcode = self.tester_retcode
|
||||
return test_retcode
|
||||
|
Loading…
x
Reference in New Issue
Block a user