Revert "Restart scheduler on error"

This reverts commit bebd1ab42940aae7ee4817621f1b498788704867. We have
discovered that the BGW slot is not freed in all cases. In this case, no
more new workers can be created. So, the patch is rolled back until the
bug has been corrected.
This commit is contained in:
Jan Nidzwetzki 2023-11-21 14:19:41 +01:00 committed by Jan Nidzwetzki
parent 747f4e2bfd
commit d17b58206b
10 changed files with 4 additions and 183 deletions

View File

@ -1 +0,0 @@
Implements: #6195 Restart scheduler on error

View File

@ -828,7 +828,6 @@ ts_bgw_scheduler_process(int32 run_for_interval_ms,
wait_for_all_jobs_to_shutdown(); wait_for_all_jobs_to_shutdown();
check_for_stopped_and_timed_out_jobs(); check_for_stopped_and_timed_out_jobs();
proc_exit(ts_bgw_scheduler_exit_code);
} }
static void static void

View File

@ -123,20 +123,6 @@ TSDLLEXPORT int ts_guc_hypertable_replication_factor_default = 1;
bool ts_guc_debug_require_batch_sorted_merge = false; bool ts_guc_debug_require_batch_sorted_merge = false;
/*
* Exit code for the scheduler.
*
* Normally it exits with a zero which means that it will not restart. If an
* error is raised, it exits with error code 1, which will trigger a
* restart.
*
* This variable exists to be able to trigger a restart for a normal exit,
* which is useful when debugging.
*
* See backend/postmaster/bgworker.c
*/
int ts_bgw_scheduler_exit_code = 0;
#ifdef TS_DEBUG #ifdef TS_DEBUG
bool ts_shutdown_bgw = false; bool ts_shutdown_bgw = false;
char *ts_current_timestamp_mock = NULL; char *ts_current_timestamp_mock = NULL;
@ -786,19 +772,6 @@ _guc_init(void)
/* assign_hook= */ NULL, /* assign_hook= */ NULL,
/* show_hook= */ NULL); /* show_hook= */ NULL);
DefineCustomIntVariable(/* name= */ "timescaledb.shutdown_bgw_scheduler_exit_code",
/* short_desc= */ "exit code to use when shutting down the scheduler",
/* long_desc= */ "this is for debugging purposes",
/* valueAddr= */ &ts_bgw_scheduler_exit_code,
/* bootValue= */ 0,
/* minValue= */ 0,
/* maxValue= */ 255,
/* context= */ PGC_SIGHUP,
/* flags= */ 0,
/* check_hook= */ NULL,
/* assign_hook= */ NULL,
/* show_hook= */ NULL);
DefineCustomStringVariable(/* name= */ "timescaledb.current_timestamp_mock", DefineCustomStringVariable(/* name= */ "timescaledb.current_timestamp_mock",
/* short_desc= */ "set the current timestamp", /* short_desc= */ "set the current timestamp",
/* long_desc= */ "this is for debugging purposes", /* long_desc= */ "this is for debugging purposes",

View File

@ -100,14 +100,6 @@ extern TSDLLEXPORT DistCopyTransferFormat ts_guc_dist_copy_transfer_format;
typedef void (*set_ssl_options_hook_type)(const char *user_name); typedef void (*set_ssl_options_hook_type)(const char *user_name);
extern TSDLLEXPORT set_ssl_options_hook_type ts_set_ssl_options_hook; extern TSDLLEXPORT set_ssl_options_hook_type ts_set_ssl_options_hook;
/*
* Exit code to use when scheduler exits.
*
* Mostly used for debugging, but defined also for non-debug builds since that
* simplifies the code (and also simplifies debugging non-debug builds).
*/
extern TSDLLEXPORT int ts_bgw_scheduler_exit_code;
#ifdef TS_DEBUG #ifdef TS_DEBUG
extern bool ts_shutdown_bgw; extern bool ts_shutdown_bgw;
extern char *ts_current_timestamp_mock; extern char *ts_current_timestamp_mock;

View File

@ -84,8 +84,6 @@ typedef enum SchedulerState
static volatile sig_atomic_t got_SIGHUP = false; static volatile sig_atomic_t got_SIGHUP = false;
int ts_guc_bgw_scheduler_restart_time_sec = 30;
static void static void
launcher_sighup(SIGNAL_ARGS) launcher_sighup(SIGNAL_ARGS)
{ {
@ -240,24 +238,10 @@ terminate_background_worker(BackgroundWorkerHandle *handle)
} }
extern void extern void
ts_bgw_cluster_launcher_init(void) ts_bgw_cluster_launcher_register(void)
{ {
BackgroundWorker worker; BackgroundWorker worker;
DefineCustomIntVariable(/* name= */ "timescaledb.bgw_scheduler_restart_time",
/* short_desc= */ "Restart time for scheduler in seconds",
/* long_desc= */
"The number of seconds until the scheduler restart on failure.",
/* valueAddr= */ &ts_guc_bgw_scheduler_restart_time_sec,
/* bootValue= */ 30,
/* minValue= */ 1,
/* maxValue= */ 3600,
/* context= */ PGC_POSTMASTER,
/* flags= */ GUC_UNIT_S,
/* check_hook= */ NULL,
/* assign_hook= */ NULL,
/* show_hook= */ NULL);
memset(&worker, 0, sizeof(worker)); memset(&worker, 0, sizeof(worker));
/* set up worker settings for our main worker */ /* set up worker settings for our main worker */
snprintf(worker.bgw_name, BGW_MAXLEN, "TimescaleDB Background Worker Launcher"); snprintf(worker.bgw_name, BGW_MAXLEN, "TimescaleDB Background Worker Launcher");
@ -292,7 +276,7 @@ register_entrypoint_for_db(Oid db_id, VirtualTransactionId vxid, BackgroundWorke
memset(&worker, 0, sizeof(worker)); memset(&worker, 0, sizeof(worker));
snprintf(worker.bgw_name, BGW_MAXLEN, "TimescaleDB Background Worker Scheduler"); snprintf(worker.bgw_name, BGW_MAXLEN, "TimescaleDB Background Worker Scheduler");
worker.bgw_flags = BGWORKER_SHMEM_ACCESS | BGWORKER_BACKEND_DATABASE_CONNECTION; worker.bgw_flags = BGWORKER_SHMEM_ACCESS | BGWORKER_BACKEND_DATABASE_CONNECTION;
worker.bgw_restart_time = ts_guc_bgw_scheduler_restart_time_sec, worker.bgw_restart_time = BGW_NEVER_RESTART;
worker.bgw_start_time = BgWorkerStart_RecoveryFinished; worker.bgw_start_time = BgWorkerStart_RecoveryFinished;
snprintf(worker.bgw_library_name, BGW_MAXLEN, EXTENSION_NAME); snprintf(worker.bgw_library_name, BGW_MAXLEN, EXTENSION_NAME);
snprintf(worker.bgw_function_name, BGW_MAXLEN, BGW_ENTRYPOINT_FUNCNAME); snprintf(worker.bgw_function_name, BGW_MAXLEN, BGW_ENTRYPOINT_FUNCNAME);

View File

@ -10,9 +10,7 @@
#include <postgres.h> #include <postgres.h>
#include <fmgr.h> #include <fmgr.h>
extern int ts_guc_bgw_scheduler_restart_time_sec; extern void ts_bgw_cluster_launcher_register(void);
extern void ts_bgw_cluster_launcher_init(void);
/*called by postmaster at launcher bgw startup*/ /*called by postmaster at launcher bgw startup*/
TSDLLEXPORT extern Datum ts_bgw_cluster_launcher_main(PG_FUNCTION_ARGS); TSDLLEXPORT extern Datum ts_bgw_cluster_launcher_main(PG_FUNCTION_ARGS);

View File

@ -711,7 +711,7 @@ _PG_init(void)
timescaledb_shmem_request_hook(); timescaledb_shmem_request_hook();
#endif #endif
ts_bgw_cluster_launcher_init(); ts_bgw_cluster_launcher_register();
ts_bgw_counter_setup_gucs(); ts_bgw_counter_setup_gucs();
ts_bgw_interface_register_api_version(); ts_bgw_interface_register_api_version();
ts_seclabel_init(); ts_seclabel_init();

View File

@ -1,83 +0,0 @@
-- This file and its contents are licensed under the Timescale License.
-- Please see the included NOTICE for copyright information and
-- LICENSE-TIMESCALE for a copy of the license.
\c :TEST_DBNAME :ROLE_SUPERUSER
CREATE VIEW tsdb_bgw AS
SELECT datname, application_name FROM pg_stat_activity
WHERE application_name LIKE 'TimescaleDB%'
ORDER BY datname, application_name;
SHOW timescaledb.bgw_scheduler_restart_time;
timescaledb.bgw_scheduler_restart_time
----------------------------------------
30s
(1 row)
SELECT _timescaledb_functions.start_background_workers();
start_background_workers
--------------------------
t
(1 row)
SELECT pg_sleep(10); -- Wait for scheduler to start.
pg_sleep
----------
(1 row)
SELECT * FROM tsdb_bgw;
datname | application_name
--------------------------+-----------------------------------------
db_bgw_scheduler_restart | TimescaleDB Background Worker Scheduler
| TimescaleDB Background Worker Launcher
(2 rows)
ALTER SYSTEM SET timescaledb.shutdown_bgw_scheduler TO 'on';
ALTER SYSTEM SET timescaledb.shutdown_bgw_scheduler_exit_code TO 1;
SELECT pg_reload_conf();
pg_reload_conf
----------------
t
(1 row)
SELECT pg_sleep(20); -- Wait for scheduler to exit.
pg_sleep
----------
(1 row)
SELECT * FROM tsdb_bgw;
datname | application_name
---------+----------------------------------------
| TimescaleDB Background Worker Launcher
(1 row)
ALTER SYSTEM RESET timescaledb.shutdown_bgw_scheduler;
ALTER SYSTEM RESET timescaledb.shutdown_bgw_scheduler_exit_code;
SELECT pg_reload_conf();
pg_reload_conf
----------------
t
(1 row)
SELECT pg_sleep(30); -- Wait for scheduler to restart.
pg_sleep
----------
(1 row)
SELECT * FROM tsdb_bgw;
datname | application_name
--------------------------+-----------------------------------------
db_bgw_scheduler_restart | TimescaleDB Background Worker Scheduler
| TimescaleDB Background Worker Launcher
(2 rows)
SELECT pg_terminate_backend(pid)
FROM pg_stat_activity
WHERE datname = :'TEST_DBNAME'
AND application_name LIKE 'TimescaleDB%';
pg_terminate_backend
----------------------
t
(1 row)

View File

@ -47,7 +47,6 @@ if(CMAKE_BUILD_TYPE MATCHES Debug)
TEST_FILES TEST_FILES
bgw_db_scheduler.sql bgw_db_scheduler.sql
bgw_scheduler_control.sql bgw_scheduler_control.sql
bgw_scheduler_restart.sql
job_errors_permissions.sql job_errors_permissions.sql
troubleshooting_job_errors.sql troubleshooting_job_errors.sql
bgw_db_scheduler_fixed.sql bgw_db_scheduler_fixed.sql
@ -141,7 +140,6 @@ set(SOLO_TESTS
# This interferes with other tests since it reloads the config to increase # This interferes with other tests since it reloads the config to increase
# log level. # log level.
bgw_scheduler_control bgw_scheduler_control
bgw_scheduler_restart
bgw_db_scheduler bgw_db_scheduler
job_errors_permissions job_errors_permissions
troubleshooting_job_errors troubleshooting_job_errors

View File

@ -1,39 +0,0 @@
-- This file and its contents are licensed under the Timescale License.
-- Please see the included NOTICE for copyright information and
-- LICENSE-TIMESCALE for a copy of the license.
\c :TEST_DBNAME :ROLE_SUPERUSER
CREATE VIEW tsdb_bgw AS
SELECT datname, application_name FROM pg_stat_activity
WHERE application_name LIKE 'TimescaleDB%'
ORDER BY datname, application_name;
SHOW timescaledb.bgw_scheduler_restart_time;
SELECT _timescaledb_functions.start_background_workers();
SELECT pg_sleep(10); -- Wait for scheduler to start.
SELECT * FROM tsdb_bgw;
ALTER SYSTEM SET timescaledb.shutdown_bgw_scheduler TO 'on';
ALTER SYSTEM SET timescaledb.shutdown_bgw_scheduler_exit_code TO 1;
SELECT pg_reload_conf();
SELECT pg_sleep(20); -- Wait for scheduler to exit.
SELECT * FROM tsdb_bgw;
ALTER SYSTEM RESET timescaledb.shutdown_bgw_scheduler;
ALTER SYSTEM RESET timescaledb.shutdown_bgw_scheduler_exit_code;
SELECT pg_reload_conf();
SELECT pg_sleep(30); -- Wait for scheduler to restart.
SELECT * FROM tsdb_bgw;
SELECT pg_terminate_backend(pid)
FROM pg_stat_activity
WHERE datname = :'TEST_DBNAME'
AND application_name LIKE 'TimescaleDB%';