Fix SIGHUP handling in scheduler and launcher

SIGHUP's can be dropped in between background worker startup and
the call into the background worker entrypoint. Namely, Postgres
calls `pqsignal(SIGHUP, SIG_IGN)` inside of `StartBackgroundWorker`.
Thus, SIGHUPs will be ignored before the call to the entrypoint.
This creates a possible race condition where a config file change
is not correctly processed (and is instead ignored). We prevent
this by always processing the config file after we set our own
signal handler.

We also fix the tests here in two ways:
1) We disable background workers in this test (or, rather, delete the
line that starts them back up).

2) We put in a new mock timer option that calls the standard timer wait.
This allows us to test proper latch processing in the SIGHUP case.

We believe that this resolves some flakiness in our tests as well.
This commit is contained in:
Matvey Arye 2019-06-26 18:26:56 -04:00 committed by Matvey Arye
parent 1194df58c3
commit 2768c5db3f
8 changed files with 40 additions and 12 deletions

View File

@ -725,6 +725,10 @@ ts_bgw_scheduler_register_signal_handlers(void)
*/
pqsignal(SIGTERM, handle_sigterm);
pqsignal(SIGHUP, handle_sighup);
/* Some SIGHUPS may already have been dropped, so we must load the file here */
got_SIGHUP = false;
ProcessConfigFile(PGC_SIGHUP);
}
Datum

View File

@ -115,4 +115,10 @@ ts_timer_set(const Timer *timer)
{
current_timer_implementation = timer;
}
const Timer *
ts_get_standard_timer()
{
return &standard_timer;
}
#endif

View File

@ -24,6 +24,7 @@ extern TSDLLEXPORT TimestampTz ts_timer_get_current_timestamp(void);
#ifdef TS_DEBUG
extern void ts_timer_set(const Timer *timer);
extern const Timer *ts_get_standard_timer(void);
#endif
#endif /* BGW_TIMER_H */

View File

@ -706,6 +706,10 @@ ts_bgw_cluster_launcher_main(PG_FUNCTION_ARGS)
pqsignal(SIGINT, StatementCancelHandler);
pqsignal(SIGTERM, launcher_sigterm);
pqsignal(SIGHUP, launcher_sighup);
/* Some SIGHUPS may already have been dropped, so we must load the file here */
got_SIGHUP = false;
ProcessConfigFile(PGC_SIGHUP);
BackgroundWorkerUnblockSignals();
ereport(DEBUG1, (errmsg("TimescaleDB background worker launcher started")));

View File

@ -28,6 +28,7 @@ LANGUAGE C VOLATILE STRICT;
\set WAIT_ON_JOB 0
\set IMMEDIATELY_SET_UNTIL 1
\set WAIT_FOR_OTHER_TO_ADVANCE 2
\set WAIT_FOR_STANDARD_WAITLATCH 3
CREATE OR REPLACE FUNCTION ts_bgw_params_mock_wait_returns_immediately(new_val INTEGER) RETURNS VOID
AS :MODULE_PATHNAME LANGUAGE C VOLATILE;
--allow us to inject test jobs
@ -64,12 +65,6 @@ SELECT _timescaledb_internal.stop_background_workers();
DELETE FROM _timescaledb_config.bgw_job WHERE TRUE;
TRUNCATE _timescaledb_internal.bgw_job_stat;
SELECT _timescaledb_internal.start_background_workers();
start_background_workers
--------------------------
t
(1 row)
\c :TEST_DBNAME :ROLE_DEFAULT_PERM_USER
CREATE TABLE public.bgw_log(
msg_no INT,
@ -587,6 +582,12 @@ DELETE FROM _timescaledb_config.bgw_job;
\c :TEST_DBNAME :ROLE_DEFAULT_PERM_USER
--escalated priv needed for access to pg_stat_activity
\c :TEST_DBNAME :ROLE_SUPERUSER
SELECT ts_bgw_params_mock_wait_returns_immediately(:WAIT_FOR_STANDARD_WAITLATCH);
ts_bgw_params_mock_wait_returns_immediately
---------------------------------------------
(1 row)
SELECT ts_bgw_db_scheduler_test_run(-1);
ts_bgw_db_scheduler_test_run
------------------------------
@ -625,11 +626,10 @@ SELECT ts_bgw_db_scheduler_test_wait_for_scheduler_finish();
(1 row)
SELECT * FROM sorted_bgw_log;
msg_no | mock_time | application_name | msg
--------+---------------------+------------------+--------------------------------------------------------
0 | 0 | DB Scheduler | [TESTING] Wait until 9223372036854775807, started at 0
1 | 9223372036854775807 | DB Scheduler | bgw scheduler stopped due to shutdown_bgw guc
(2 rows)
msg_no | mock_time | application_name | msg
--------+-----------+------------------+-----------------------------------------------
0 | 0 | DB Scheduler | bgw scheduler stopped due to shutdown_bgw guc
(1 row)
ALTER SYSTEM RESET timescaledb.shutdown_bgw_scheduler;
SELECT pg_reload_conf();
@ -650,6 +650,12 @@ SHOW timescaledb.shutdown_bgw_scheduler;
off
(1 row)
SELECT ts_bgw_params_mock_wait_returns_immediately(:WAIT_ON_JOB);
ts_bgw_params_mock_wait_returns_immediately
---------------------------------------------
(1 row)
--Test that sending SIGTERM to scheduler terminates the jobs as well
\c :TEST_DBNAME :ROLE_SUPERUSER
TRUNCATE bgw_log;

View File

@ -37,6 +37,7 @@ LANGUAGE C VOLATILE STRICT;
\set WAIT_ON_JOB 0
\set IMMEDIATELY_SET_UNTIL 1
\set WAIT_FOR_OTHER_TO_ADVANCE 2
\set WAIT_FOR_STANDARD_WAITLATCH 3
CREATE OR REPLACE FUNCTION ts_bgw_params_mock_wait_returns_immediately(new_val INTEGER) RETURNS VOID
AS :MODULE_PATHNAME LANGUAGE C VOLATILE;
@ -73,7 +74,6 @@ $BODY$;
SELECT _timescaledb_internal.stop_background_workers();
DELETE FROM _timescaledb_config.bgw_job WHERE TRUE;
TRUNCATE _timescaledb_internal.bgw_job_stat;
SELECT _timescaledb_internal.start_background_workers();
\c :TEST_DBNAME :ROLE_DEFAULT_PERM_USER
@ -263,6 +263,7 @@ DELETE FROM _timescaledb_config.bgw_job;
--escalated priv needed for access to pg_stat_activity
\c :TEST_DBNAME :ROLE_SUPERUSER
SELECT ts_bgw_params_mock_wait_returns_immediately(:WAIT_FOR_STANDARD_WAITLATCH);
SELECT ts_bgw_db_scheduler_test_run(-1);
SHOW timescaledb.shutdown_bgw_scheduler;
@ -281,6 +282,8 @@ SELECT pg_reload_conf();
SELECT pg_sleep(0.001);
SHOW timescaledb.shutdown_bgw_scheduler;
SELECT ts_bgw_params_mock_wait_returns_immediately(:WAIT_ON_JOB);
--Test that sending SIGTERM to scheduler terminates the jobs as well
\c :TEST_DBNAME :ROLE_SUPERUSER
TRUNCATE bgw_log;

View File

@ -13,6 +13,7 @@ typedef enum MockWaitType
WAIT_ON_JOB = 0,
IMMEDIATELY_SET_UNTIL,
WAIT_FOR_OTHER_TO_ADVANCE,
WAIT_FOR_STANDARD_WAITLATCH,
_MAX_MOCK_WAIT_TYPE
} MockWaitType;

View File

@ -69,6 +69,9 @@ mock_wait(TimestampTz until)
return true;
}
case WAIT_FOR_STANDARD_WAITLATCH:
ts_get_standard_timer()->wait(until);
return true;
default:
return false;
}