1
0
mirror of https://github.com/timescale/timescaledb.git synced 2025-05-16 10:33:27 +08:00

Don't reuse job id

We shouldnt reuse job ids to make it easy to recognize the job
log entries for a job. We also need to keep the old job around
to not break loading dumps from older versions.
This commit is contained in:
Sven Klemm 2024-05-02 16:03:23 +02:00 committed by Sven Klemm
parent fc46eabf99
commit e298ecd532
16 changed files with 122 additions and 73 deletions

@ -37,6 +37,9 @@ BEGIN
END;
$BODY$ SET search_path TO pg_catalog, pg_temp;
-- this is the previous job that was created for the same purpose
-- which has scheduled set to false. We need to keep it around to
-- not break loading dumps from older versions.
INSERT INTO _timescaledb_config.bgw_job (
id,
application_name,
@ -57,7 +60,44 @@ INSERT INTO _timescaledb_config.bgw_job (
VALUES
(
2,
'Job History Log Retention Policy [2]',
'Error Log Retention Policy [2]',
INTERVAL '1 month',
INTERVAL '1 hour',
-1,
INTERVAL '1h',
'_timescaledb_functions',
'policy_job_error_retention',
pg_catalog.quote_ident(current_role)::regrole,
false,
'{"drop_after":"1 month"}',
'_timescaledb_functions',
'policy_job_error_retention_check',
false,
'2000-01-01 00:00:00+00'::timestamptz
) ON CONFLICT (id) DO NOTHING;
INSERT INTO _timescaledb_config.bgw_job (
id,
application_name,
schedule_interval,
max_runtime,
max_retries,
retry_period,
proc_schema,
proc_name,
owner,
scheduled,
config,
check_schema,
check_name,
fixed_schedule,
initial_start
)
VALUES
(
3,
'Job History Log Retention Policy [3]',
INTERVAL '1 month',
INTERVAL '1 hour',
-1,

@ -373,16 +373,42 @@ ALTER EXTENSION timescaledb
DROP TABLE _timescaledb_internal.job_errors;
UPDATE
_timescaledb_config.bgw_job
SET
application_name = 'Job History Log Retention Policy [2]',
proc_schema = '_timescaledb_functions',
proc_name = 'policy_job_stat_history_retention',
check_schema = '_timescaledb_functions',
check_name = 'policy_job_stat_history_retention_check'
WHERE
id = 2;
UPDATE _timescaledb_config.bgw_job SET scheduled = false WHERE id = 2;
INSERT INTO _timescaledb_config.bgw_job (
id,
application_name,
schedule_interval,
max_runtime,
max_retries,
retry_period,
proc_schema,
proc_name,
owner,
scheduled,
config,
check_schema,
check_name,
fixed_schedule,
initial_start
)
VALUES
(
3,
'Job History Log Retention Policy [3]',
INTERVAL '1 month',
INTERVAL '1 hour',
-1,
INTERVAL '1h',
'_timescaledb_functions',
'policy_job_stat_history_retention',
pg_catalog.quote_ident(current_role)::regrole,
true,
'{"drop_after":"1 month"}',
'_timescaledb_functions',
'policy_job_stat_history_retention_check',
true,
'2000-01-01 00:00:00+00'::timestamptz
) ON CONFLICT (id) DO NOTHING;
DROP FUNCTION IF EXISTS _timescaledb_internal.policy_job_error_retention(job_id integer,config jsonb);
DROP FUNCTION IF EXISTS _timescaledb_internal.policy_job_error_retention_check(config jsonb);

@ -284,16 +284,7 @@ BEGIN
END;
$BODY$ SET search_path TO pg_catalog, pg_temp;
UPDATE
_timescaledb_config.bgw_job
SET
application_name = 'Error Log Retention Policy [2]',
proc_schema = '_timescaledb_functions',
proc_name = 'policy_job_error_retention',
check_schema = '_timescaledb_functions',
check_name = 'policy_job_error_retention_check'
WHERE
id = 2;
UPDATE _timescaledb_config.bgw_job SET scheduled = true WHERE id = 2;
DELETE FROM _timescaledb_config.bgw_job WHERE id = 3;
DROP PROCEDURE IF EXISTS _timescaledb_functions.cagg_migrate_to_time_bucket(cagg REGCLASS);

@ -74,9 +74,8 @@ SELECT add_job('custom_func_definer', '1h', config:='{"type":"function"}'::jsonb
1004
(1 row)
-- exclude the telemetry[1] and job error retention[2] jobs
-- job 2 may have already run which will set its next_start field thus making the test flaky
SELECT * FROM timescaledb_information.jobs WHERE job_id NOT IN (1,2) ORDER BY 1;
-- exclude internal jobs
SELECT * FROM timescaledb_information.jobs WHERE job_id >= 1000 ORDER BY 1;
job_id | application_name | schedule_interval | max_runtime | max_retries | retry_period | proc_schema | proc_name | owner | scheduled | fixed_schedule | config | next_start | initial_start | hypertable_schema | hypertable_name | check_schema | check_name
--------+----------------------------+-------------------+-------------+-------------+--------------+-------------+---------------------+-------------------+-----------+----------------+-----------------------+------------------------------+------------------------------+-------------------+-----------------+--------------+------------
1000 | User-Defined Action [1000] | @ 1 hour | @ 0 | -1 | @ 5 mins | public | custom_func | default_perm_user | t | t | {"type": "function"} | Fri Dec 31 16:00:00 1999 PST | Fri Dec 31 16:00:00 1999 PST | | | |

@ -74,9 +74,8 @@ SELECT add_job('custom_func_definer', '1h', config:='{"type":"function"}'::jsonb
1004
(1 row)
-- exclude the telemetry[1] and job error retention[2] jobs
-- job 2 may have already run which will set its next_start field thus making the test flaky
SELECT * FROM timescaledb_information.jobs WHERE job_id NOT IN (1,2) ORDER BY 1;
-- exclude internal jobs
SELECT * FROM timescaledb_information.jobs WHERE job_id >= 1000 ORDER BY 1;
job_id | application_name | schedule_interval | max_runtime | max_retries | retry_period | proc_schema | proc_name | owner | scheduled | fixed_schedule | config | next_start | initial_start | hypertable_schema | hypertable_name | check_schema | check_name
--------+----------------------------+-------------------+-------------+-------------+--------------+-------------+---------------------+-------------------+-----------+----------------+-----------------------+------------------------------+------------------------------+-------------------+-----------------+--------------+------------
1000 | User-Defined Action [1000] | @ 1 hour | @ 0 | -1 | @ 5 mins | public | custom_func | default_perm_user | t | t | {"type": "function"} | Fri Dec 31 16:00:00 1999 PST | Fri Dec 31 16:00:00 1999 PST | | | |

@ -74,9 +74,8 @@ SELECT add_job('custom_func_definer', '1h', config:='{"type":"function"}'::jsonb
1004
(1 row)
-- exclude the telemetry[1] and job error retention[2] jobs
-- job 2 may have already run which will set its next_start field thus making the test flaky
SELECT * FROM timescaledb_information.jobs WHERE job_id NOT IN (1,2) ORDER BY 1;
-- exclude internal jobs
SELECT * FROM timescaledb_information.jobs WHERE job_id >= 1000 ORDER BY 1;
job_id | application_name | schedule_interval | max_runtime | max_retries | retry_period | proc_schema | proc_name | owner | scheduled | fixed_schedule | config | next_start | initial_start | hypertable_schema | hypertable_name | check_schema | check_name
--------+----------------------------+-------------------+-------------+-------------+--------------+-------------+---------------------+-------------------+-----------+----------------+-----------------------+------------------------------+------------------------------+-------------------+-----------------+--------------+------------
1000 | User-Defined Action [1000] | @ 1 hour | @ 0 | -1 | @ 5 mins | public | custom_func | default_perm_user | t | t | {"type": "function"} | Fri Dec 31 16:00:00 1999 PST | Fri Dec 31 16:00:00 1999 PST | | | |

@ -74,9 +74,8 @@ SELECT add_job('custom_func_definer', '1h', config:='{"type":"function"}'::jsonb
1004
(1 row)
-- exclude the telemetry[1] and job error retention[2] jobs
-- job 2 may have already run which will set its next_start field thus making the test flaky
SELECT * FROM timescaledb_information.jobs WHERE job_id NOT IN (1,2) ORDER BY 1;
-- exclude internal jobs
SELECT * FROM timescaledb_information.jobs WHERE job_id >= 1000 ORDER BY 1;
job_id | application_name | schedule_interval | max_runtime | max_retries | retry_period | proc_schema | proc_name | owner | scheduled | fixed_schedule | config | next_start | initial_start | hypertable_schema | hypertable_name | check_schema | check_name
--------+----------------------------+-------------------+-------------+-------------+--------------+-------------+---------------------+-------------------+-----------+----------------+-----------------------+------------------------------+------------------------------+-------------------+-----------------+--------------+------------
1000 | User-Defined Action [1000] | @ 1 hour | @ 0 | -1 | @ 5 mins | public | custom_func | default_perm_user | t | t | {"type": "function"} | Fri Dec 31 16:00:00 1999 PST | Fri Dec 31 16:00:00 1999 PST | | | |

@ -87,9 +87,9 @@ select pg_sleep(20);
(1 row)
-- exclude the retention policy
-- exclude internal jobs
select job_id, data->'job'->>'proc_name' as proc_name, data->'error_data'->>'message' as err_message, data->'error_data'->>'sqlerrcode' as sqlerrcode
from _timescaledb_internal.bgw_job_stat_history WHERE job_id != 2 and succeeded is false;
from _timescaledb_internal.bgw_job_stat_history WHERE job_id >= 1000 and succeeded is false;
job_id | proc_name | err_message | sqlerrcode
--------+--------------+-----------------------------------------------------+------------
1000 | job_fail | raising an exception | P0001
@ -104,7 +104,7 @@ SELECT pg_reload_conf();
(1 row)
-- test the retention job
SELECT next_start FROM alter_job(2, next_start => '2060-01-01 00:00:00+00'::timestamptz);
SELECT next_start FROM alter_job(3, next_start => '2060-01-01 00:00:00+00'::timestamptz);
next_start
------------------------------
Wed Dec 31 16:00:00 2059 PST
@ -128,9 +128,9 @@ WHERE succeeded IS FALSE;
(3 rows)
-- drop all job_stats for the retention job
DELETE FROM _timescaledb_internal.bgw_job_stat WHERE job_id = 2;
SELECT next_start FROM alter_job(2, next_start => now() + interval '2 seconds') \gset
SELECT test.wait_for_job_to_run(2, 1);
DELETE FROM _timescaledb_internal.bgw_job_stat WHERE job_id = 3;
SELECT next_start FROM alter_job(3, next_start => now() + interval '2 seconds') \gset
SELECT test.wait_for_job_to_run(3, 1);
wait_for_job_to_run
---------------------
t

@ -620,18 +620,16 @@ ERROR: must be owner of hypertable "test_table"
select add_retention_policy('test_strict', drop_after => NULL);
ERROR: need to specify one of "drop_after" or "drop_created_before"
\set ON_ERROR_STOP 1
-- Check the number of non-telemetry policies. We check for telemetry
-- policy in telemetry_community.sql
-- Check the number of non-internal policies
SELECT proc_name, count(*)
FROM _timescaledb_config.bgw_job
WHERE proc_name NOT LIKE '%telemetry%'
WHERE id >= 1000
GROUP BY proc_name;
proc_name | count
-----------------------------------+-------
policy_reorder | 1
policy_job_stat_history_retention | 1
policy_retention | 2
(3 rows)
proc_name | count
------------------+-------
policy_reorder | 1
policy_retention | 2
(2 rows)
-- test retention with null arguments
select add_retention_policy(NULL, NULL);

@ -526,12 +526,12 @@ SELECT jsonb_pretty(get_telemetry_report() -> 'stats_by_job_type');
"max_consecutive_failures": 1 +
}, +
"user_defined_action": { +
"total_runs": 2, +
"total_crashes": 0, +
"total_runs": 3, +
"total_crashes": 1, +
"total_duration": "@ 0", +
"total_failures": 2, +
"total_successes": 0, +
"max_consecutive_crashes": 0, +
"max_consecutive_crashes": 1, +
"total_duration_failures": "@ 4 secs",+
"max_consecutive_failures": 1 +
}, +

@ -526,12 +526,12 @@ SELECT jsonb_pretty(get_telemetry_report() -> 'stats_by_job_type');
"max_consecutive_failures": 1 +
}, +
"user_defined_action": { +
"total_runs": 2, +
"total_crashes": 0, +
"total_runs": 3, +
"total_crashes": 1, +
"total_duration": "@ 0", +
"total_failures": 2, +
"total_successes": 0, +
"max_consecutive_crashes": 0, +
"max_consecutive_crashes": 1, +
"total_duration_failures": "@ 4 secs",+
"max_consecutive_failures": 1 +
}, +

@ -526,12 +526,12 @@ SELECT jsonb_pretty(get_telemetry_report() -> 'stats_by_job_type');
"max_consecutive_failures": 1 +
}, +
"user_defined_action": { +
"total_runs": 2, +
"total_crashes": 0, +
"total_runs": 3, +
"total_crashes": 1, +
"total_duration": "@ 0", +
"total_failures": 2, +
"total_successes": 0, +
"max_consecutive_crashes": 0, +
"max_consecutive_crashes": 1, +
"total_duration_failures": "@ 4 secs",+
"max_consecutive_failures": 1 +
}, +

@ -526,12 +526,12 @@ SELECT jsonb_pretty(get_telemetry_report() -> 'stats_by_job_type');
"max_consecutive_failures": 1 +
}, +
"user_defined_action": { +
"total_runs": 2, +
"total_crashes": 0, +
"total_runs": 3, +
"total_crashes": 1, +
"total_duration": "@ 0", +
"total_failures": 2, +
"total_successes": 0, +
"max_consecutive_crashes": 0, +
"max_consecutive_crashes": 1, +
"total_duration_failures": "@ 4 secs",+
"max_consecutive_failures": 1 +
}, +

@ -53,9 +53,8 @@ SELECT add_job('custom_proc2','1h', config:= '{"type":"procedure"}'::jsonb, init
SELECT add_job('custom_func', '1h', config:='{"type":"function"}'::jsonb, initial_start => :'time_zero'::TIMESTAMPTZ);
SELECT add_job('custom_func_definer', '1h', config:='{"type":"function"}'::jsonb, initial_start => :'time_zero'::TIMESTAMPTZ);
-- exclude the telemetry[1] and job error retention[2] jobs
-- job 2 may have already run which will set its next_start field thus making the test flaky
SELECT * FROM timescaledb_information.jobs WHERE job_id NOT IN (1,2) ORDER BY 1;
-- exclude internal jobs
SELECT * FROM timescaledb_information.jobs WHERE job_id >= 1000 ORDER BY 1;
SELECT count(*) FROM _timescaledb_config.bgw_job WHERE config->>'type' IN ('procedure', 'function');

@ -59,15 +59,15 @@ from _timescaledb_internal.bgw_job_stat_history where job_id = :jobf_id and succ
select delete_job(:jobf_id);
select pg_sleep(20);
-- exclude the retention policy
-- exclude internal jobs
select job_id, data->'job'->>'proc_name' as proc_name, data->'error_data'->>'message' as err_message, data->'error_data'->>'sqlerrcode' as sqlerrcode
from _timescaledb_internal.bgw_job_stat_history WHERE job_id != 2 and succeeded is false;
from _timescaledb_internal.bgw_job_stat_history WHERE job_id >= 1000 and succeeded is false;
ALTER SYSTEM RESET DEFAULT_TRANSACTION_ISOLATION;
SELECT pg_reload_conf();
-- test the retention job
SELECT next_start FROM alter_job(2, next_start => '2060-01-01 00:00:00+00'::timestamptz);
SELECT next_start FROM alter_job(3, next_start => '2060-01-01 00:00:00+00'::timestamptz);
TRUNCATE TABLE _timescaledb_internal.bgw_job_stat_history;
INSERT INTO _timescaledb_internal.bgw_job_stat_history(job_id, pid, succeeded, execution_start, execution_finish, data)
VALUES (123, 12345, false, '2000-01-01 00:00:00+00'::timestamptz, '2000-01-01 00:00:10+00'::timestamptz, '{}'),
@ -79,9 +79,9 @@ SELECT job_id, pid, succeeded, execution_start, execution_finish, data
FROM _timescaledb_internal.bgw_job_stat_history
WHERE succeeded IS FALSE;
-- drop all job_stats for the retention job
DELETE FROM _timescaledb_internal.bgw_job_stat WHERE job_id = 2;
SELECT next_start FROM alter_job(2, next_start => now() + interval '2 seconds') \gset
SELECT test.wait_for_job_to_run(2, 1);
DELETE FROM _timescaledb_internal.bgw_job_stat WHERE job_id = 3;
SELECT next_start FROM alter_job(3, next_start => now() + interval '2 seconds') \gset
SELECT test.wait_for_job_to_run(3, 1);
-- only the last row remains
SELECT job_id, pid, succeeded, execution_start, execution_finish, data
FROM _timescaledb_internal.bgw_job_stat_history

@ -326,11 +326,10 @@ select remove_retention_policy('test_table');
select add_retention_policy('test_strict', drop_after => NULL);
\set ON_ERROR_STOP 1
-- Check the number of non-telemetry policies. We check for telemetry
-- policy in telemetry_community.sql
-- Check the number of non-internal policies
SELECT proc_name, count(*)
FROM _timescaledb_config.bgw_job
WHERE proc_name NOT LIKE '%telemetry%'
WHERE id >= 1000
GROUP BY proc_name;