mirror of
https://github.com/timescale/timescaledb.git
synced 2025-05-15 18:13:18 +08:00
This PR changes the launcher to use a state machine to keep track of the state of each database scheduler. Further, it add polling to go through the list of databases and check their states. This solves several issues 1) A CREATE DATABASE call using a template that already has TimescaleDB installed previously did not start a scheduler until the next database restart. A test for this case has been added. 2) A lack of available slots or background workers when a new database was added meant that the scheduler would not be stared until the next database restart. Now this will be retried on every polling event. This PR also simplifies logic since database entries are never removed from the hash table and thus never added more than once. State transitions are now easier to read and reason about. Documentations for the state transitions has been added.
416 lines
13 KiB
Plaintext
416 lines
13 KiB
Plaintext
\c single_2 :ROLE_SUPERUSER
|
|
\ir include/bgw_launcher_utils.sql
|
|
/*
|
|
* Note on testing: need a couple wrappers that pg_sleep in a loop to wait for changes
|
|
* to appear in pg_stat_activity.
|
|
* Further Note: PG 9.6 changed what appeared in pg_stat_activity, so the launcher doesn't actually show up.
|
|
* we can still test its interactions with its children, but can't test some of the things specific to the launcher.
|
|
* So we've added some bits about the version number as needed.
|
|
*/
|
|
CREATE VIEW worker_counts as SELECT count(*) filter (WHERE application_name = 'TimescaleDB Background Worker Launcher') as launcher,
|
|
count(*) filter (WHERE application_name = 'TimescaleDB Background Worker Scheduler' AND datname = 'single') as single_scheduler,
|
|
count(*) filter (WHERE application_name = 'TimescaleDB Background Worker Scheduler' AND datname = 'single_2') as single_2_scheduler,
|
|
count(*) filter (WHERE application_name = 'TimescaleDB Background Worker Scheduler' AND datname = 'template1') as template1_scheduler
|
|
FROM pg_stat_activity;
|
|
CREATE FUNCTION wait_worker_counts(launcher_ct INTEGER, scheduler1_ct INTEGER, scheduler2_ct INTEGER, template1_ct INTEGER) RETURNS BOOLEAN LANGUAGE PLPGSQL AS
|
|
$BODY$
|
|
DECLARE
|
|
r INTEGER;
|
|
BEGIN
|
|
FOR i in 1..10
|
|
LOOP
|
|
SELECT COUNT(*) from worker_counts where (launcher = launcher_ct OR current_setting('server_version_num')::int < 100000)
|
|
AND single_scheduler = scheduler1_ct AND single_2_scheduler = scheduler2_ct and template1_scheduler = template1_ct into r;
|
|
if(r < 1) THEN
|
|
PERFORM pg_sleep(0.1);
|
|
PERFORM pg_stat_clear_snapshot();
|
|
ELSE
|
|
--We have the correct counts!
|
|
RETURN TRUE;
|
|
END IF;
|
|
END LOOP;
|
|
RETURN FALSE;
|
|
END
|
|
$BODY$;
|
|
/*
|
|
* When we've connected to single_2, we should be able to see the cluster launcher
|
|
* and the scheduler for single in pg_stat_activity
|
|
* but single_2 shouldn't have a scheduler because ext not created yet
|
|
*/
|
|
SELECT wait_worker_counts(1,1,0,0);
|
|
wait_worker_counts
|
|
--------------------
|
|
t
|
|
(1 row)
|
|
|
|
/*Now create the extension in single_2*/
|
|
SET client_min_messages = ERROR;
|
|
CREATE EXTENSION timescaledb CASCADE;
|
|
RESET client_min_messages;
|
|
SELECT wait_worker_counts(1,1,1,0);
|
|
wait_worker_counts
|
|
--------------------
|
|
t
|
|
(1 row)
|
|
|
|
DROP DATABASE single;
|
|
/* Now the db_scheduler for single should have disappeared*/
|
|
SELECT wait_worker_counts(1,0,1,0);
|
|
wait_worker_counts
|
|
--------------------
|
|
t
|
|
(1 row)
|
|
|
|
/*Now let's restart the scheduler and make sure our backend_start changed */
|
|
SELECT backend_start as orig_backend_start
|
|
FROM pg_stat_activity
|
|
WHERE application_name = 'TimescaleDB Background Worker Scheduler'
|
|
AND datname = 'single_2' \gset
|
|
/* We'll do this in a txn so that we can see that the worker locks on our txn before continuing*/
|
|
BEGIN;
|
|
SELECT _timescaledb_internal.restart_background_workers();
|
|
restart_background_workers
|
|
----------------------------
|
|
t
|
|
(1 row)
|
|
|
|
SELECT wait_worker_counts(1,0,1,0);
|
|
wait_worker_counts
|
|
--------------------
|
|
t
|
|
(1 row)
|
|
|
|
SELECT (backend_start > :'orig_backend_start'::timestamptz) backend_start_changed,
|
|
(wait_event = 'virtualxid') wait_event_changed
|
|
FROM pg_stat_activity
|
|
WHERE application_name = 'TimescaleDB Background Worker Scheduler'
|
|
AND datname = 'single_2';
|
|
backend_start_changed | wait_event_changed
|
|
-----------------------+--------------------
|
|
t | t
|
|
(1 row)
|
|
|
|
COMMIT;
|
|
SELECT wait_worker_counts(1,0,1,0);
|
|
wait_worker_counts
|
|
--------------------
|
|
t
|
|
(1 row)
|
|
|
|
SELECT (wait_event IS DISTINCT FROM 'virtualxid') wait_event_changed
|
|
FROM pg_stat_activity
|
|
WHERE application_name = 'TimescaleDB Background Worker Scheduler'
|
|
AND datname = 'single_2';
|
|
wait_event_changed
|
|
--------------------
|
|
t
|
|
(1 row)
|
|
|
|
/*Test stop*/
|
|
SELECT _timescaledb_internal.stop_background_workers();
|
|
stop_background_workers
|
|
-------------------------
|
|
t
|
|
(1 row)
|
|
|
|
SELECT wait_worker_counts(1,0,0,0);
|
|
wait_worker_counts
|
|
--------------------
|
|
t
|
|
(1 row)
|
|
|
|
/*Make sure it doesn't break if we stop twice in a row*/
|
|
SELECT _timescaledb_internal.stop_background_workers();
|
|
stop_background_workers
|
|
-------------------------
|
|
t
|
|
(1 row)
|
|
|
|
SELECT wait_worker_counts(1,0,0,0);
|
|
wait_worker_counts
|
|
--------------------
|
|
t
|
|
(1 row)
|
|
|
|
/*test start*/
|
|
SELECT _timescaledb_internal.start_background_workers();
|
|
start_background_workers
|
|
--------------------------
|
|
t
|
|
(1 row)
|
|
|
|
SELECT wait_worker_counts(1,0,1,0);
|
|
wait_worker_counts
|
|
--------------------
|
|
t
|
|
(1 row)
|
|
|
|
/*make sure start is idempotent*/
|
|
SELECT backend_start as orig_backend_start
|
|
FROM pg_stat_activity
|
|
WHERE application_name = 'TimescaleDB Background Worker Scheduler'
|
|
AND datname = 'single_2' \gset
|
|
/* Since we're doing idempotency tests, we're also going to exercise our queue and start 20 times*/
|
|
SELECT _timescaledb_internal.start_background_workers() as start_background_workers, * FROM generate_series(1,20);
|
|
start_background_workers | generate_series
|
|
--------------------------+-----------------
|
|
t | 1
|
|
t | 2
|
|
t | 3
|
|
t | 4
|
|
t | 5
|
|
t | 6
|
|
t | 7
|
|
t | 8
|
|
t | 9
|
|
t | 10
|
|
t | 11
|
|
t | 12
|
|
t | 13
|
|
t | 14
|
|
t | 15
|
|
t | 16
|
|
t | 17
|
|
t | 18
|
|
t | 19
|
|
t | 20
|
|
(20 rows)
|
|
|
|
/*Here we're waiting to see if something shows up in pg_stat_activity,
|
|
* so we have to condition our loop in the opposite way. We'll only wait
|
|
* half a second in total as well so that tests don't take too long. */
|
|
CREATE FUNCTION wait_equals(TIMESTAMPTZ) RETURNS BOOLEAN LANGUAGE PLPGSQL AS
|
|
$BODY$
|
|
DECLARE
|
|
r BOOLEAN;
|
|
BEGIN
|
|
FOR i in 1..5
|
|
LOOP
|
|
SELECT (backend_start = $1::timestamptz) backend_start_unchanged
|
|
FROM pg_stat_activity
|
|
WHERE application_name = 'TimescaleDB Background Worker Scheduler'
|
|
AND datname = 'single_2' into r;
|
|
if(r) THEN
|
|
PERFORM pg_sleep(0.1);
|
|
PERFORM pg_stat_clear_snapshot();
|
|
ELSE
|
|
RETURN FALSE;
|
|
END IF;
|
|
END LOOP;
|
|
RETURN TRUE;
|
|
END
|
|
$BODY$;
|
|
select wait_equals(:'orig_backend_start');
|
|
wait_equals
|
|
-------------
|
|
t
|
|
(1 row)
|
|
|
|
SELECT _timescaledb_internal.restart_background_workers();
|
|
restart_background_workers
|
|
----------------------------
|
|
t
|
|
(1 row)
|
|
|
|
SELECT wait_worker_counts(1,0,1,0);
|
|
wait_worker_counts
|
|
--------------------
|
|
t
|
|
(1 row)
|
|
|
|
/*Make sure drop extension statement restarts the worker and on rollback it keeps running*/
|
|
/*Now let's restart the scheduler and make sure our backend_start changed */
|
|
SELECT backend_start as orig_backend_start
|
|
FROM pg_stat_activity
|
|
WHERE application_name = 'TimescaleDB Background Worker Scheduler'
|
|
AND datname = 'single_2' \gset
|
|
BEGIN;
|
|
DROP EXTENSION timescaledb;
|
|
SELECT wait_worker_counts(1,0,1,0);
|
|
wait_worker_counts
|
|
--------------------
|
|
t
|
|
(1 row)
|
|
|
|
ROLLBACK;
|
|
CREATE FUNCTION wait_greater(TIMESTAMPTZ) RETURNS BOOLEAN LANGUAGE PLPGSQL AS
|
|
$BODY$
|
|
DECLARE
|
|
r BOOLEAN;
|
|
BEGIN
|
|
FOR i in 1..10
|
|
LOOP
|
|
SELECT (backend_start > $1::timestamptz) backend_start_changed
|
|
FROM pg_stat_activity
|
|
WHERE application_name = 'TimescaleDB Background Worker Scheduler'
|
|
AND datname = 'single_2' into r;
|
|
if(NOT r) THEN
|
|
PERFORM pg_sleep(0.1);
|
|
PERFORM pg_stat_clear_snapshot();
|
|
ELSE
|
|
RETURN TRUE;
|
|
END IF;
|
|
END LOOP;
|
|
RETURN FALSE;
|
|
END
|
|
$BODY$;
|
|
SELECT wait_greater(:'orig_backend_start');
|
|
wait_greater
|
|
--------------
|
|
t
|
|
(1 row)
|
|
|
|
/* Make sure canceling the launcher backend causes a restart of schedulers */
|
|
SELECT backend_start as orig_backend_start
|
|
FROM pg_stat_activity
|
|
WHERE application_name = 'TimescaleDB Background Worker Scheduler'
|
|
AND datname = 'single_2' \gset
|
|
SELECT coalesce(
|
|
(SELECT pg_cancel_backend(pid) FROM pg_stat_activity WHERE application_name = 'TimescaleDB Background Worker Launcher'),
|
|
(SELECT current_setting('server_version_num')::int < 100000));
|
|
coalesce
|
|
----------
|
|
t
|
|
(1 row)
|
|
|
|
SELECT wait_worker_counts(1,0,1,0);
|
|
wait_worker_counts
|
|
--------------------
|
|
t
|
|
(1 row)
|
|
|
|
SELECT ((current_setting('server_version_num')::int < 100000) OR wait_greater(:'orig_backend_start')) as wait_greater;
|
|
wait_greater
|
|
--------------
|
|
t
|
|
(1 row)
|
|
|
|
/* Make sure dropping the extension means that the scheduler is stopped*/
|
|
BEGIN;
|
|
DROP EXTENSION timescaledb;
|
|
COMMIT;
|
|
SELECT wait_worker_counts(1,0,0,0);
|
|
wait_worker_counts
|
|
--------------------
|
|
t
|
|
(1 row)
|
|
|
|
/* Test that background workers are stopped with DROP OWNED */
|
|
ALTER ROLE :ROLE_DEFAULT_PERM_USER WITH SUPERUSER;
|
|
\c single_2 :ROLE_DEFAULT_PERM_USER
|
|
SET client_min_messages = ERROR;
|
|
CREATE EXTENSION timescaledb CASCADE;
|
|
RESET client_min_messages;
|
|
/* Make sure there is 1 launcher and 1 bgw in single_2 */
|
|
SELECT wait_worker_counts(launcher_ct=>1, scheduler1_ct=> 0, scheduler2_ct=>1, template1_ct=>0);
|
|
wait_worker_counts
|
|
--------------------
|
|
t
|
|
(1 row)
|
|
|
|
/* drop a non-owner of the extension results in no change to worker counts */
|
|
DROP OWNED BY :ROLE_DEFAULT_PERM_USER_2;
|
|
SELECT wait_worker_counts(launcher_ct=>1, scheduler1_ct=> 0, scheduler2_ct=>1, template1_ct=>0);
|
|
wait_worker_counts
|
|
--------------------
|
|
t
|
|
(1 row)
|
|
|
|
/* drop of owner of extension results in extension drop and a stop to the bgw */
|
|
DROP OWNED BY :ROLE_DEFAULT_PERM_USER;
|
|
/* The worker in single_2 is dead. Note that 0s are respected */
|
|
SELECT wait_worker_counts(launcher_ct=>1, scheduler1_ct=>0, scheduler2_ct=>0, template1_ct=>0);
|
|
wait_worker_counts
|
|
--------------------
|
|
t
|
|
(1 row)
|
|
|
|
\c single_2 :ROLE_SUPERUSER
|
|
ALTER ROLE :ROLE_DEFAULT_PERM_USER WITH NOSUPERUSER;
|
|
/* Connect to the template1 database */
|
|
\c template1
|
|
\ir include/bgw_launcher_utils.sql
|
|
/*
|
|
* Note on testing: need a couple wrappers that pg_sleep in a loop to wait for changes
|
|
* to appear in pg_stat_activity.
|
|
* Further Note: PG 9.6 changed what appeared in pg_stat_activity, so the launcher doesn't actually show up.
|
|
* we can still test its interactions with its children, but can't test some of the things specific to the launcher.
|
|
* So we've added some bits about the version number as needed.
|
|
*/
|
|
CREATE VIEW worker_counts as SELECT count(*) filter (WHERE application_name = 'TimescaleDB Background Worker Launcher') as launcher,
|
|
count(*) filter (WHERE application_name = 'TimescaleDB Background Worker Scheduler' AND datname = 'single') as single_scheduler,
|
|
count(*) filter (WHERE application_name = 'TimescaleDB Background Worker Scheduler' AND datname = 'single_2') as single_2_scheduler,
|
|
count(*) filter (WHERE application_name = 'TimescaleDB Background Worker Scheduler' AND datname = 'template1') as template1_scheduler
|
|
FROM pg_stat_activity;
|
|
CREATE FUNCTION wait_worker_counts(launcher_ct INTEGER, scheduler1_ct INTEGER, scheduler2_ct INTEGER, template1_ct INTEGER) RETURNS BOOLEAN LANGUAGE PLPGSQL AS
|
|
$BODY$
|
|
DECLARE
|
|
r INTEGER;
|
|
BEGIN
|
|
FOR i in 1..10
|
|
LOOP
|
|
SELECT COUNT(*) from worker_counts where (launcher = launcher_ct OR current_setting('server_version_num')::int < 100000)
|
|
AND single_scheduler = scheduler1_ct AND single_2_scheduler = scheduler2_ct and template1_scheduler = template1_ct into r;
|
|
if(r < 1) THEN
|
|
PERFORM pg_sleep(0.1);
|
|
PERFORM pg_stat_clear_snapshot();
|
|
ELSE
|
|
--We have the correct counts!
|
|
RETURN TRUE;
|
|
END IF;
|
|
END LOOP;
|
|
RETURN FALSE;
|
|
END
|
|
$BODY$;
|
|
BEGIN;
|
|
/* Then create extension there in a txn and make sure we see a scheduler start */
|
|
SET client_min_messages = ERROR;
|
|
CREATE EXTENSION timescaledb CASCADE;
|
|
RESET client_min_messages;
|
|
SELECT wait_worker_counts(1,0,0,1);
|
|
wait_worker_counts
|
|
--------------------
|
|
t
|
|
(1 row)
|
|
|
|
COMMIT;
|
|
/* End our transaction and it should immediately exit because it's a template database.*/
|
|
SELECT wait_worker_counts(1,0,0,0);
|
|
wait_worker_counts
|
|
--------------------
|
|
t
|
|
(1 row)
|
|
|
|
/* Clean up the template database, removing our test utilities etc */
|
|
\ir include/bgw_launcher_utils_cleanup.sql
|
|
DROP FUNCTION wait_worker_counts(integer, integer, integer, integer);
|
|
DROP VIEW worker_counts;
|
|
\c single_2
|
|
/* Now try creating a DB from a template with the extension already installed.
|
|
* Make sure we see a scheduler start. */
|
|
CREATE DATABASE single;
|
|
SELECT wait_worker_counts(1,1,0,0);
|
|
wait_worker_counts
|
|
--------------------
|
|
t
|
|
(1 row)
|
|
|
|
DROP DATABASE single;
|
|
/* Now make sure that there's no race between create database and create extension.
|
|
* Although to be honest, this race probably wouldn't manifest in this test. */
|
|
\c template1
|
|
DROP EXTENSION timescaledb;
|
|
\c single_2
|
|
CREATE DATABASE single;
|
|
\c single
|
|
SET client_min_messages = ERROR;
|
|
CREATE EXTENSION timescaledb;
|
|
RESET client_min_messages;
|
|
\c single_2
|
|
SELECT wait_worker_counts(1,1,0,0);
|
|
wait_worker_counts
|
|
--------------------
|
|
t
|
|
(1 row)
|
|
|