Don't reuse job id

We shouldnt reuse job ids to make it easy to recognize the job
log entries for a job. We also need to keep the old job around
to not break loading dumps from older versions.
This commit is contained in:
Sven Klemm 2024-05-02 16:03:23 +02:00 committed by Sven Klemm
parent fc46eabf99
commit e298ecd532
16 changed files with 122 additions and 73 deletions

View File

@ -37,6 +37,9 @@ BEGIN
END; END;
$BODY$ SET search_path TO pg_catalog, pg_temp; $BODY$ SET search_path TO pg_catalog, pg_temp;
-- this is the previous job that was created for the same purpose
-- which has scheduled set to false. We need to keep it around to
-- not break loading dumps from older versions.
INSERT INTO _timescaledb_config.bgw_job ( INSERT INTO _timescaledb_config.bgw_job (
id, id,
application_name, application_name,
@ -57,7 +60,44 @@ INSERT INTO _timescaledb_config.bgw_job (
VALUES VALUES
( (
2, 2,
'Job History Log Retention Policy [2]', 'Error Log Retention Policy [2]',
INTERVAL '1 month',
INTERVAL '1 hour',
-1,
INTERVAL '1h',
'_timescaledb_functions',
'policy_job_error_retention',
pg_catalog.quote_ident(current_role)::regrole,
false,
'{"drop_after":"1 month"}',
'_timescaledb_functions',
'policy_job_error_retention_check',
false,
'2000-01-01 00:00:00+00'::timestamptz
) ON CONFLICT (id) DO NOTHING;
INSERT INTO _timescaledb_config.bgw_job (
id,
application_name,
schedule_interval,
max_runtime,
max_retries,
retry_period,
proc_schema,
proc_name,
owner,
scheduled,
config,
check_schema,
check_name,
fixed_schedule,
initial_start
)
VALUES
(
3,
'Job History Log Retention Policy [3]',
INTERVAL '1 month', INTERVAL '1 month',
INTERVAL '1 hour', INTERVAL '1 hour',
-1, -1,

View File

@ -373,16 +373,42 @@ ALTER EXTENSION timescaledb
DROP TABLE _timescaledb_internal.job_errors; DROP TABLE _timescaledb_internal.job_errors;
UPDATE UPDATE _timescaledb_config.bgw_job SET scheduled = false WHERE id = 2;
_timescaledb_config.bgw_job INSERT INTO _timescaledb_config.bgw_job (
SET id,
application_name = 'Job History Log Retention Policy [2]', application_name,
proc_schema = '_timescaledb_functions', schedule_interval,
proc_name = 'policy_job_stat_history_retention', max_runtime,
check_schema = '_timescaledb_functions', max_retries,
check_name = 'policy_job_stat_history_retention_check' retry_period,
WHERE proc_schema,
id = 2; proc_name,
owner,
scheduled,
config,
check_schema,
check_name,
fixed_schedule,
initial_start
)
VALUES
(
3,
'Job History Log Retention Policy [3]',
INTERVAL '1 month',
INTERVAL '1 hour',
-1,
INTERVAL '1h',
'_timescaledb_functions',
'policy_job_stat_history_retention',
pg_catalog.quote_ident(current_role)::regrole,
true,
'{"drop_after":"1 month"}',
'_timescaledb_functions',
'policy_job_stat_history_retention_check',
true,
'2000-01-01 00:00:00+00'::timestamptz
) ON CONFLICT (id) DO NOTHING;
DROP FUNCTION IF EXISTS _timescaledb_internal.policy_job_error_retention(job_id integer,config jsonb); DROP FUNCTION IF EXISTS _timescaledb_internal.policy_job_error_retention(job_id integer,config jsonb);
DROP FUNCTION IF EXISTS _timescaledb_internal.policy_job_error_retention_check(config jsonb); DROP FUNCTION IF EXISTS _timescaledb_internal.policy_job_error_retention_check(config jsonb);

View File

@ -284,16 +284,7 @@ BEGIN
END; END;
$BODY$ SET search_path TO pg_catalog, pg_temp; $BODY$ SET search_path TO pg_catalog, pg_temp;
UPDATE UPDATE _timescaledb_config.bgw_job SET scheduled = true WHERE id = 2;
_timescaledb_config.bgw_job DELETE FROM _timescaledb_config.bgw_job WHERE id = 3;
SET
application_name = 'Error Log Retention Policy [2]',
proc_schema = '_timescaledb_functions',
proc_name = 'policy_job_error_retention',
check_schema = '_timescaledb_functions',
check_name = 'policy_job_error_retention_check'
WHERE
id = 2;
DROP PROCEDURE IF EXISTS _timescaledb_functions.cagg_migrate_to_time_bucket(cagg REGCLASS); DROP PROCEDURE IF EXISTS _timescaledb_functions.cagg_migrate_to_time_bucket(cagg REGCLASS);

View File

@ -74,9 +74,8 @@ SELECT add_job('custom_func_definer', '1h', config:='{"type":"function"}'::jsonb
1004 1004
(1 row) (1 row)
-- exclude the telemetry[1] and job error retention[2] jobs -- exclude internal jobs
-- job 2 may have already run which will set its next_start field thus making the test flaky SELECT * FROM timescaledb_information.jobs WHERE job_id >= 1000 ORDER BY 1;
SELECT * FROM timescaledb_information.jobs WHERE job_id NOT IN (1,2) ORDER BY 1;
job_id | application_name | schedule_interval | max_runtime | max_retries | retry_period | proc_schema | proc_name | owner | scheduled | fixed_schedule | config | next_start | initial_start | hypertable_schema | hypertable_name | check_schema | check_name job_id | application_name | schedule_interval | max_runtime | max_retries | retry_period | proc_schema | proc_name | owner | scheduled | fixed_schedule | config | next_start | initial_start | hypertable_schema | hypertable_name | check_schema | check_name
--------+----------------------------+-------------------+-------------+-------------+--------------+-------------+---------------------+-------------------+-----------+----------------+-----------------------+------------------------------+------------------------------+-------------------+-----------------+--------------+------------ --------+----------------------------+-------------------+-------------+-------------+--------------+-------------+---------------------+-------------------+-----------+----------------+-----------------------+------------------------------+------------------------------+-------------------+-----------------+--------------+------------
1000 | User-Defined Action [1000] | @ 1 hour | @ 0 | -1 | @ 5 mins | public | custom_func | default_perm_user | t | t | {"type": "function"} | Fri Dec 31 16:00:00 1999 PST | Fri Dec 31 16:00:00 1999 PST | | | | 1000 | User-Defined Action [1000] | @ 1 hour | @ 0 | -1 | @ 5 mins | public | custom_func | default_perm_user | t | t | {"type": "function"} | Fri Dec 31 16:00:00 1999 PST | Fri Dec 31 16:00:00 1999 PST | | | |

View File

@ -74,9 +74,8 @@ SELECT add_job('custom_func_definer', '1h', config:='{"type":"function"}'::jsonb
1004 1004
(1 row) (1 row)
-- exclude the telemetry[1] and job error retention[2] jobs -- exclude internal jobs
-- job 2 may have already run which will set its next_start field thus making the test flaky SELECT * FROM timescaledb_information.jobs WHERE job_id >= 1000 ORDER BY 1;
SELECT * FROM timescaledb_information.jobs WHERE job_id NOT IN (1,2) ORDER BY 1;
job_id | application_name | schedule_interval | max_runtime | max_retries | retry_period | proc_schema | proc_name | owner | scheduled | fixed_schedule | config | next_start | initial_start | hypertable_schema | hypertable_name | check_schema | check_name job_id | application_name | schedule_interval | max_runtime | max_retries | retry_period | proc_schema | proc_name | owner | scheduled | fixed_schedule | config | next_start | initial_start | hypertable_schema | hypertable_name | check_schema | check_name
--------+----------------------------+-------------------+-------------+-------------+--------------+-------------+---------------------+-------------------+-----------+----------------+-----------------------+------------------------------+------------------------------+-------------------+-----------------+--------------+------------ --------+----------------------------+-------------------+-------------+-------------+--------------+-------------+---------------------+-------------------+-----------+----------------+-----------------------+------------------------------+------------------------------+-------------------+-----------------+--------------+------------
1000 | User-Defined Action [1000] | @ 1 hour | @ 0 | -1 | @ 5 mins | public | custom_func | default_perm_user | t | t | {"type": "function"} | Fri Dec 31 16:00:00 1999 PST | Fri Dec 31 16:00:00 1999 PST | | | | 1000 | User-Defined Action [1000] | @ 1 hour | @ 0 | -1 | @ 5 mins | public | custom_func | default_perm_user | t | t | {"type": "function"} | Fri Dec 31 16:00:00 1999 PST | Fri Dec 31 16:00:00 1999 PST | | | |

View File

@ -74,9 +74,8 @@ SELECT add_job('custom_func_definer', '1h', config:='{"type":"function"}'::jsonb
1004 1004
(1 row) (1 row)
-- exclude the telemetry[1] and job error retention[2] jobs -- exclude internal jobs
-- job 2 may have already run which will set its next_start field thus making the test flaky SELECT * FROM timescaledb_information.jobs WHERE job_id >= 1000 ORDER BY 1;
SELECT * FROM timescaledb_information.jobs WHERE job_id NOT IN (1,2) ORDER BY 1;
job_id | application_name | schedule_interval | max_runtime | max_retries | retry_period | proc_schema | proc_name | owner | scheduled | fixed_schedule | config | next_start | initial_start | hypertable_schema | hypertable_name | check_schema | check_name job_id | application_name | schedule_interval | max_runtime | max_retries | retry_period | proc_schema | proc_name | owner | scheduled | fixed_schedule | config | next_start | initial_start | hypertable_schema | hypertable_name | check_schema | check_name
--------+----------------------------+-------------------+-------------+-------------+--------------+-------------+---------------------+-------------------+-----------+----------------+-----------------------+------------------------------+------------------------------+-------------------+-----------------+--------------+------------ --------+----------------------------+-------------------+-------------+-------------+--------------+-------------+---------------------+-------------------+-----------+----------------+-----------------------+------------------------------+------------------------------+-------------------+-----------------+--------------+------------
1000 | User-Defined Action [1000] | @ 1 hour | @ 0 | -1 | @ 5 mins | public | custom_func | default_perm_user | t | t | {"type": "function"} | Fri Dec 31 16:00:00 1999 PST | Fri Dec 31 16:00:00 1999 PST | | | | 1000 | User-Defined Action [1000] | @ 1 hour | @ 0 | -1 | @ 5 mins | public | custom_func | default_perm_user | t | t | {"type": "function"} | Fri Dec 31 16:00:00 1999 PST | Fri Dec 31 16:00:00 1999 PST | | | |

View File

@ -74,9 +74,8 @@ SELECT add_job('custom_func_definer', '1h', config:='{"type":"function"}'::jsonb
1004 1004
(1 row) (1 row)
-- exclude the telemetry[1] and job error retention[2] jobs -- exclude internal jobs
-- job 2 may have already run which will set its next_start field thus making the test flaky SELECT * FROM timescaledb_information.jobs WHERE job_id >= 1000 ORDER BY 1;
SELECT * FROM timescaledb_information.jobs WHERE job_id NOT IN (1,2) ORDER BY 1;
job_id | application_name | schedule_interval | max_runtime | max_retries | retry_period | proc_schema | proc_name | owner | scheduled | fixed_schedule | config | next_start | initial_start | hypertable_schema | hypertable_name | check_schema | check_name job_id | application_name | schedule_interval | max_runtime | max_retries | retry_period | proc_schema | proc_name | owner | scheduled | fixed_schedule | config | next_start | initial_start | hypertable_schema | hypertable_name | check_schema | check_name
--------+----------------------------+-------------------+-------------+-------------+--------------+-------------+---------------------+-------------------+-----------+----------------+-----------------------+------------------------------+------------------------------+-------------------+-----------------+--------------+------------ --------+----------------------------+-------------------+-------------+-------------+--------------+-------------+---------------------+-------------------+-----------+----------------+-----------------------+------------------------------+------------------------------+-------------------+-----------------+--------------+------------
1000 | User-Defined Action [1000] | @ 1 hour | @ 0 | -1 | @ 5 mins | public | custom_func | default_perm_user | t | t | {"type": "function"} | Fri Dec 31 16:00:00 1999 PST | Fri Dec 31 16:00:00 1999 PST | | | | 1000 | User-Defined Action [1000] | @ 1 hour | @ 0 | -1 | @ 5 mins | public | custom_func | default_perm_user | t | t | {"type": "function"} | Fri Dec 31 16:00:00 1999 PST | Fri Dec 31 16:00:00 1999 PST | | | |

View File

@ -87,9 +87,9 @@ select pg_sleep(20);
(1 row) (1 row)
-- exclude the retention policy -- exclude internal jobs
select job_id, data->'job'->>'proc_name' as proc_name, data->'error_data'->>'message' as err_message, data->'error_data'->>'sqlerrcode' as sqlerrcode select job_id, data->'job'->>'proc_name' as proc_name, data->'error_data'->>'message' as err_message, data->'error_data'->>'sqlerrcode' as sqlerrcode
from _timescaledb_internal.bgw_job_stat_history WHERE job_id != 2 and succeeded is false; from _timescaledb_internal.bgw_job_stat_history WHERE job_id >= 1000 and succeeded is false;
job_id | proc_name | err_message | sqlerrcode job_id | proc_name | err_message | sqlerrcode
--------+--------------+-----------------------------------------------------+------------ --------+--------------+-----------------------------------------------------+------------
1000 | job_fail | raising an exception | P0001 1000 | job_fail | raising an exception | P0001
@ -104,7 +104,7 @@ SELECT pg_reload_conf();
(1 row) (1 row)
-- test the retention job -- test the retention job
SELECT next_start FROM alter_job(2, next_start => '2060-01-01 00:00:00+00'::timestamptz); SELECT next_start FROM alter_job(3, next_start => '2060-01-01 00:00:00+00'::timestamptz);
next_start next_start
------------------------------ ------------------------------
Wed Dec 31 16:00:00 2059 PST Wed Dec 31 16:00:00 2059 PST
@ -128,9 +128,9 @@ WHERE succeeded IS FALSE;
(3 rows) (3 rows)
-- drop all job_stats for the retention job -- drop all job_stats for the retention job
DELETE FROM _timescaledb_internal.bgw_job_stat WHERE job_id = 2; DELETE FROM _timescaledb_internal.bgw_job_stat WHERE job_id = 3;
SELECT next_start FROM alter_job(2, next_start => now() + interval '2 seconds') \gset SELECT next_start FROM alter_job(3, next_start => now() + interval '2 seconds') \gset
SELECT test.wait_for_job_to_run(2, 1); SELECT test.wait_for_job_to_run(3, 1);
wait_for_job_to_run wait_for_job_to_run
--------------------- ---------------------
t t

View File

@ -620,18 +620,16 @@ ERROR: must be owner of hypertable "test_table"
select add_retention_policy('test_strict', drop_after => NULL); select add_retention_policy('test_strict', drop_after => NULL);
ERROR: need to specify one of "drop_after" or "drop_created_before" ERROR: need to specify one of "drop_after" or "drop_created_before"
\set ON_ERROR_STOP 1 \set ON_ERROR_STOP 1
-- Check the number of non-telemetry policies. We check for telemetry -- Check the number of non-internal policies
-- policy in telemetry_community.sql
SELECT proc_name, count(*) SELECT proc_name, count(*)
FROM _timescaledb_config.bgw_job FROM _timescaledb_config.bgw_job
WHERE proc_name NOT LIKE '%telemetry%' WHERE id >= 1000
GROUP BY proc_name; GROUP BY proc_name;
proc_name | count proc_name | count
-----------------------------------+------- ------------------+-------
policy_reorder | 1 policy_reorder | 1
policy_job_stat_history_retention | 1 policy_retention | 2
policy_retention | 2 (2 rows)
(3 rows)
-- test retention with null arguments -- test retention with null arguments
select add_retention_policy(NULL, NULL); select add_retention_policy(NULL, NULL);

View File

@ -526,12 +526,12 @@ SELECT jsonb_pretty(get_telemetry_report() -> 'stats_by_job_type');
"max_consecutive_failures": 1 + "max_consecutive_failures": 1 +
}, + }, +
"user_defined_action": { + "user_defined_action": { +
"total_runs": 2, + "total_runs": 3, +
"total_crashes": 0, + "total_crashes": 1, +
"total_duration": "@ 0", + "total_duration": "@ 0", +
"total_failures": 2, + "total_failures": 2, +
"total_successes": 0, + "total_successes": 0, +
"max_consecutive_crashes": 0, + "max_consecutive_crashes": 1, +
"total_duration_failures": "@ 4 secs",+ "total_duration_failures": "@ 4 secs",+
"max_consecutive_failures": 1 + "max_consecutive_failures": 1 +
}, + }, +

View File

@ -526,12 +526,12 @@ SELECT jsonb_pretty(get_telemetry_report() -> 'stats_by_job_type');
"max_consecutive_failures": 1 + "max_consecutive_failures": 1 +
}, + }, +
"user_defined_action": { + "user_defined_action": { +
"total_runs": 2, + "total_runs": 3, +
"total_crashes": 0, + "total_crashes": 1, +
"total_duration": "@ 0", + "total_duration": "@ 0", +
"total_failures": 2, + "total_failures": 2, +
"total_successes": 0, + "total_successes": 0, +
"max_consecutive_crashes": 0, + "max_consecutive_crashes": 1, +
"total_duration_failures": "@ 4 secs",+ "total_duration_failures": "@ 4 secs",+
"max_consecutive_failures": 1 + "max_consecutive_failures": 1 +
}, + }, +

View File

@ -526,12 +526,12 @@ SELECT jsonb_pretty(get_telemetry_report() -> 'stats_by_job_type');
"max_consecutive_failures": 1 + "max_consecutive_failures": 1 +
}, + }, +
"user_defined_action": { + "user_defined_action": { +
"total_runs": 2, + "total_runs": 3, +
"total_crashes": 0, + "total_crashes": 1, +
"total_duration": "@ 0", + "total_duration": "@ 0", +
"total_failures": 2, + "total_failures": 2, +
"total_successes": 0, + "total_successes": 0, +
"max_consecutive_crashes": 0, + "max_consecutive_crashes": 1, +
"total_duration_failures": "@ 4 secs",+ "total_duration_failures": "@ 4 secs",+
"max_consecutive_failures": 1 + "max_consecutive_failures": 1 +
}, + }, +

View File

@ -526,12 +526,12 @@ SELECT jsonb_pretty(get_telemetry_report() -> 'stats_by_job_type');
"max_consecutive_failures": 1 + "max_consecutive_failures": 1 +
}, + }, +
"user_defined_action": { + "user_defined_action": { +
"total_runs": 2, + "total_runs": 3, +
"total_crashes": 0, + "total_crashes": 1, +
"total_duration": "@ 0", + "total_duration": "@ 0", +
"total_failures": 2, + "total_failures": 2, +
"total_successes": 0, + "total_successes": 0, +
"max_consecutive_crashes": 0, + "max_consecutive_crashes": 1, +
"total_duration_failures": "@ 4 secs",+ "total_duration_failures": "@ 4 secs",+
"max_consecutive_failures": 1 + "max_consecutive_failures": 1 +
}, + }, +

View File

@ -53,9 +53,8 @@ SELECT add_job('custom_proc2','1h', config:= '{"type":"procedure"}'::jsonb, init
SELECT add_job('custom_func', '1h', config:='{"type":"function"}'::jsonb, initial_start => :'time_zero'::TIMESTAMPTZ); SELECT add_job('custom_func', '1h', config:='{"type":"function"}'::jsonb, initial_start => :'time_zero'::TIMESTAMPTZ);
SELECT add_job('custom_func_definer', '1h', config:='{"type":"function"}'::jsonb, initial_start => :'time_zero'::TIMESTAMPTZ); SELECT add_job('custom_func_definer', '1h', config:='{"type":"function"}'::jsonb, initial_start => :'time_zero'::TIMESTAMPTZ);
-- exclude the telemetry[1] and job error retention[2] jobs -- exclude internal jobs
-- job 2 may have already run which will set its next_start field thus making the test flaky SELECT * FROM timescaledb_information.jobs WHERE job_id >= 1000 ORDER BY 1;
SELECT * FROM timescaledb_information.jobs WHERE job_id NOT IN (1,2) ORDER BY 1;
SELECT count(*) FROM _timescaledb_config.bgw_job WHERE config->>'type' IN ('procedure', 'function'); SELECT count(*) FROM _timescaledb_config.bgw_job WHERE config->>'type' IN ('procedure', 'function');

View File

@ -59,15 +59,15 @@ from _timescaledb_internal.bgw_job_stat_history where job_id = :jobf_id and succ
select delete_job(:jobf_id); select delete_job(:jobf_id);
select pg_sleep(20); select pg_sleep(20);
-- exclude the retention policy -- exclude internal jobs
select job_id, data->'job'->>'proc_name' as proc_name, data->'error_data'->>'message' as err_message, data->'error_data'->>'sqlerrcode' as sqlerrcode select job_id, data->'job'->>'proc_name' as proc_name, data->'error_data'->>'message' as err_message, data->'error_data'->>'sqlerrcode' as sqlerrcode
from _timescaledb_internal.bgw_job_stat_history WHERE job_id != 2 and succeeded is false; from _timescaledb_internal.bgw_job_stat_history WHERE job_id >= 1000 and succeeded is false;
ALTER SYSTEM RESET DEFAULT_TRANSACTION_ISOLATION; ALTER SYSTEM RESET DEFAULT_TRANSACTION_ISOLATION;
SELECT pg_reload_conf(); SELECT pg_reload_conf();
-- test the retention job -- test the retention job
SELECT next_start FROM alter_job(2, next_start => '2060-01-01 00:00:00+00'::timestamptz); SELECT next_start FROM alter_job(3, next_start => '2060-01-01 00:00:00+00'::timestamptz);
TRUNCATE TABLE _timescaledb_internal.bgw_job_stat_history; TRUNCATE TABLE _timescaledb_internal.bgw_job_stat_history;
INSERT INTO _timescaledb_internal.bgw_job_stat_history(job_id, pid, succeeded, execution_start, execution_finish, data) INSERT INTO _timescaledb_internal.bgw_job_stat_history(job_id, pid, succeeded, execution_start, execution_finish, data)
VALUES (123, 12345, false, '2000-01-01 00:00:00+00'::timestamptz, '2000-01-01 00:00:10+00'::timestamptz, '{}'), VALUES (123, 12345, false, '2000-01-01 00:00:00+00'::timestamptz, '2000-01-01 00:00:10+00'::timestamptz, '{}'),
@ -79,9 +79,9 @@ SELECT job_id, pid, succeeded, execution_start, execution_finish, data
FROM _timescaledb_internal.bgw_job_stat_history FROM _timescaledb_internal.bgw_job_stat_history
WHERE succeeded IS FALSE; WHERE succeeded IS FALSE;
-- drop all job_stats for the retention job -- drop all job_stats for the retention job
DELETE FROM _timescaledb_internal.bgw_job_stat WHERE job_id = 2; DELETE FROM _timescaledb_internal.bgw_job_stat WHERE job_id = 3;
SELECT next_start FROM alter_job(2, next_start => now() + interval '2 seconds') \gset SELECT next_start FROM alter_job(3, next_start => now() + interval '2 seconds') \gset
SELECT test.wait_for_job_to_run(2, 1); SELECT test.wait_for_job_to_run(3, 1);
-- only the last row remains -- only the last row remains
SELECT job_id, pid, succeeded, execution_start, execution_finish, data SELECT job_id, pid, succeeded, execution_start, execution_finish, data
FROM _timescaledb_internal.bgw_job_stat_history FROM _timescaledb_internal.bgw_job_stat_history

View File

@ -326,11 +326,10 @@ select remove_retention_policy('test_table');
select add_retention_policy('test_strict', drop_after => NULL); select add_retention_policy('test_strict', drop_after => NULL);
\set ON_ERROR_STOP 1 \set ON_ERROR_STOP 1
-- Check the number of non-telemetry policies. We check for telemetry -- Check the number of non-internal policies
-- policy in telemetry_community.sql
SELECT proc_name, count(*) SELECT proc_name, count(*)
FROM _timescaledb_config.bgw_job FROM _timescaledb_config.bgw_job
WHERE proc_name NOT LIKE '%telemetry%' WHERE id >= 1000
GROUP BY proc_name; GROUP BY proc_name;