diff --git a/tsl/src/remote/dist_txn.c b/tsl/src/remote/dist_txn.c index daf66aeed..c8371f8be 100644 --- a/tsl/src/remote/dist_txn.c +++ b/tsl/src/remote/dist_txn.c @@ -146,7 +146,16 @@ dist_txn_xact_callback_1pc_pre_commit() dist_txn_deallocate_prepared_stmts_if_needed(); } -/* On abort on the frontend send aborts to all of the remote endpoints */ +/* + * Abort on the access node. + * + * The access node needs to send aborts to all of the remote endpoints. This + * code should not throw errors itself, since we are already in abort due to a + * previous error. Instead, we try to emit errors as warnings. For safety, we + * should probaby try-catch and swallow any potential lower-layer errors given + * that we're doing remote calls over the network. But the semantics for + * capturing and proceeding after such recursive errors are unclear. + */ static void dist_txn_xact_callback_abort() { @@ -493,15 +502,19 @@ dist_txn_subxact_callback(SubXactEvent event, SubTransactionId mySubid, if (store == NULL) return; - /* Nothing to do at subxact start, nor after commit. */ - if (!(event == SUBXACT_EVENT_PRE_COMMIT_SUB || event == SUBXACT_EVENT_ABORT_SUB)) - return; - - if (event == SUBXACT_EVENT_PRE_COMMIT_SUB) - reject_transactions_with_incomplete_transitions(); - - if (event == SUBXACT_EVENT_ABORT_SUB) - eventcallback(DTXN_EVENT_SUB_XACT_ABORT); + switch (event) + { + case SUBXACT_EVENT_START_SUB: + case SUBXACT_EVENT_COMMIT_SUB: + /* Nothing to do at subxact start, nor after commit. */ + return; + case SUBXACT_EVENT_PRE_COMMIT_SUB: + reject_transactions_with_incomplete_transitions(); + break; + case SUBXACT_EVENT_ABORT_SUB: + eventcallback(DTXN_EVENT_SUB_XACT_ABORT); + break; + } curlevel = GetCurrentTransactionNestLevel(); diff --git a/tsl/src/remote/dist_txn.h b/tsl/src/remote/dist_txn.h index 90e28d471..3e8c689d2 100644 --- a/tsl/src/remote/dist_txn.h +++ b/tsl/src/remote/dist_txn.h @@ -45,7 +45,7 @@ extern void remote_dist_txn_set_event_handler(const DistTransactionEventHandler extern DistTransactionEvent remote_dist_txn_event_from_name(const char *eventname); extern const char *remote_dist_txn_event_name(const DistTransactionEvent event); -#endif +#endif /* DEBUG */ void _remote_dist_txn_init(void); void _remote_dist_txn_fini(void); diff --git a/tsl/src/remote/txn.c b/tsl/src/remote/txn.c index 2a96af818..d12fb6f61 100644 --- a/tsl/src/remote/txn.c +++ b/tsl/src/remote/txn.c @@ -3,6 +3,7 @@ * Please see the included NOTICE for copyright information and * LICENSE-TIMESCALE for a copy of the license. */ +#include "libpq-fe.h" #include #include #include @@ -299,28 +300,35 @@ remote_txn_check_for_leaked_prepared_statements(RemoteTxn *entry) { PGresult *res; char *count_string; + ExecStatusType status; if (PQTRANS_IDLE != PQtransactionStatus(remote_connection_get_pg_conn(entry->conn))) return; res = remote_connection_exec(entry->conn, "SELECT count(*) FROM pg_prepared_statements"); - if (PQresultStatus(res) == PGRES_TUPLES_OK) + status = PQresultStatus(res); + + switch (status) { - if (PQntuples(res) == 1 && PQnfields(res) == 1) - { - count_string = PQgetvalue(res, 0, 0); - if (strcmp("0", count_string) != 0) - elog(WARNING, "leak check: connection leaked prepared statement"); - } - else - { - remote_result_close(res); - elog(ERROR, "leak check: incorrect number of rows or columns returned"); - } + case PGRES_TUPLES_OK: + if (PQntuples(res) == 1 && PQnfields(res) == 1) + { + count_string = PQgetvalue(res, 0, 0); + if (strcmp("0", count_string) != 0) + elog(WARNING, "leak check: connection leaked prepared statement"); + } + else + elog(ERROR, "leak check: unexpected number of rows or columns returned"); + break; + case PGRES_FATAL_ERROR: + case PGRES_NONFATAL_ERROR: + elog(WARNING, "leak check: ERROR [\"%s\"]", PQresultErrorMessage(res)); + break; + default: + elog(WARNING, "leak check: unexpected result state %u", status); + break; } - else - elog(WARNING, "leak check: unexpected result \"%s\"", PQresultErrorMessage(res)); remote_result_close(res); } diff --git a/tsl/test/src/remote/connection.c b/tsl/test/src/remote/connection.c index 8b54f8a01..7b378ce99 100644 --- a/tsl/test/src/remote/connection.c +++ b/tsl/test/src/remote/connection.c @@ -25,10 +25,6 @@ #include "test_utils.h" #include "connection.h" -static const char *sql_get_backend_pid = "SELECT pg_backend_pid()"; -static const char *sql_get_application_name = - "SELECT application_name from pg_stat_activity where pid = pg_backend_pid()"; - TSConnection * get_connection() { @@ -250,10 +246,10 @@ remote_connection_get_remote_pid(const TSConnection *conn) char *pid_string; unsigned long pid_long; - res = PQexec(remote_connection_get_pg_conn(conn), sql_get_backend_pid); + res = PQexec(remote_connection_get_pg_conn(conn), "SELECT pg_backend_pid()"); if (PQresultStatus(res) != PGRES_TUPLES_OK) - return 0; + return -1; Assert(1 == PQntuples(res)); Assert(1 == PQnfields(res)); @@ -271,7 +267,10 @@ remote_connection_get_application_name(const TSConnection *conn) PGresult *res; char *app_name; - res = PQexec(remote_connection_get_pg_conn(conn), sql_get_application_name); + res = PQexec(remote_connection_get_pg_conn(conn), + "SELECT application_name " + "FROM pg_stat_activity " + "WHERE pid = pg_backend_pid()"); if (PQresultStatus(res) != PGRES_TUPLES_OK) return 0; diff --git a/tsl/test/src/remote/node_killer.c b/tsl/test/src/remote/node_killer.c index f45e559c1..225b6891a 100644 --- a/tsl/test/src/remote/node_killer.c +++ b/tsl/test/src/remote/node_killer.c @@ -51,9 +51,14 @@ void remote_node_killer_init(RemoteNodeKiller *rnk, const TSConnection *conn, const DistTransactionEvent event) { + int remote_pid = remote_connection_get_remote_pid(conn); + + if (remote_pid == -1) + elog(ERROR, "could not get PID of remote backend process"); + MemSet(rnk, 0, sizeof(*rnk)); rnk->conn = conn; - rnk->pid = remote_connection_get_remote_pid(conn); + rnk->pid = remote_pid; rnk->killevent = event; } @@ -118,10 +123,12 @@ remote_node_killer_kill(RemoteNodeKiller *rnk) elog(WARNING, "timeout while waiting for killed backend to exit"); break; } - wait_count--; pg_usleep(100L); } + /* Once PG registered the backend as gone, wait some additional time + * for it to really exit */ + pg_usleep(500L); } }