Attempt at workaround of a rare issue where long running backup processes reach a state where DNS resolution requests always time out but other processes on the same host can still resolve successfully. In case this was somehow caused by a bad boost tcp_resolver state, each request now uses a unique tcp_resolver instance.

2025-05-15 10:22:20 +08:00 · 2018-11-26 20:02:03 -08:00 · 2018-11-26 20:02:03 -08:00 · b91b26ef75
commit b91b26ef75
parent 32f434b2ee
1 changed files with 3 additions and 3 deletions
--- a/flow/Net2.actor.cpp
+++ b/flow/Net2.actor.cpp
@ -158,7 +158,6 @@ public:

 	ASIOReactor reactor;
 	INetworkConnections *network;  // initially this, but can be changed
-	tcp::resolver tcpResolver;

 	int64_t tsc_begin, tsc_end;
 	double taskBegin;
@ -478,7 +477,6 @@ Net2::Net2(NetworkAddress localAddress, bool useThreadPool, bool useMetrics)
 	: useThreadPool(useThreadPool),
 	  network(this),
 	  reactor(this),
-	  tcpResolver(reactor.ios),
 	  stopped(false),
 	  tasksIssued(0),
 	  // Until run() is called, yield() will always yield
@ -835,10 +833,11 @@ Future< Reference<IConnection> > Net2::connect( NetworkAddress toAddr, std::stri
 }

 ACTOR static Future<std::vector<NetworkAddress>> resolveTCPEndpoint_impl( Net2 *self, std::string host, std::string service) {
+	state tcp::resolver tcpResolver(self->reactor.ios);
 	Promise<std::vector<NetworkAddress>> promise;
 	state Future<std::vector<NetworkAddress>> result = promise.getFuture();

-	self->tcpResolver.async_resolve(tcp::resolver::query(host, service), [=](const boost::system::error_code &ec, tcp::resolver::iterator iter) {
+	tcpResolver.async_resolve(tcp::resolver::query(host, service), [=](const boost::system::error_code &ec, tcp::resolver::iterator iter) {
 		if(ec) {
 			promise.sendError(lookup_failed());
 			return;
@ -866,6 +865,7 @@ ACTOR static Future<std::vector<NetworkAddress>> resolveTCPEndpoint_impl( Net2 *
 	});

 	Void _ = wait(ready(result));
+	tcpResolver.cancel();

 	return result.get();
 }