mirror of
https://github.com/timescale/timescaledb.git
synced 2025-05-15 01:53:41 +08:00
Define meaning of 0 retries for jobs as no retries
So far, we have not differentiated between 0 and -1 retries of a job. -1 was defined as an infinite number of retries. The behavior of 0 was not well defined and also handled as an infinite number of retries. This commit defines 0 as no retries. Fixes: #6691
This commit is contained in:
parent
e1676ceba7
commit
3ea1417440
2
.unreleased/pr_6698
Normal file
2
.unreleased/pr_6698
Normal file
@ -0,0 +1,2 @@
|
||||
Fixes: #6698 Define meaning of 0 retries for jobs as no retries
|
||||
Thanks: @bvanelli for reporting an issue with the jobs retry count
|
@ -151,3 +151,7 @@ UPDATE _timescaledb_catalog.continuous_aggs_bucket_function
|
||||
-- attributes. This is now replaced by proper NULL values. We use TRIM() to ensure we handle empty string well.
|
||||
UPDATE _timescaledb_catalog.continuous_aggs_bucket_function SET bucket_origin = NULL WHERE TRIM(bucket_origin) = '';
|
||||
UPDATE _timescaledb_catalog.continuous_aggs_bucket_function SET bucket_timezone = NULL WHERE TRIM(bucket_timezone) = '';
|
||||
|
||||
-- So far, there were no difference between 0 and -1 retries. Since now on, 0 means no retries. Updating the retry
|
||||
-- count of existing jobs to -1 to keep the current semantics.
|
||||
UPDATE _timescaledb_config.bgw_job SET max_retries = -1 WHERE max_retries = 0;
|
||||
|
@ -985,7 +985,7 @@ ts_bgw_job_check_max_retries(BgwJob *job)
|
||||
job_stat = ts_bgw_job_stat_find(job->fd.id);
|
||||
|
||||
/* stop to execute failing jobs after reached the "max_retries" option */
|
||||
if (job->fd.max_retries > 0 && job_stat->fd.consecutive_failures >= job->fd.max_retries)
|
||||
if (job->fd.max_retries >= 0 && job_stat->fd.consecutive_failures >= job->fd.max_retries)
|
||||
{
|
||||
ereport(WARNING,
|
||||
(errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
|
||||
|
@ -10,26 +10,26 @@
|
||||
#include <utils/builtins.h>
|
||||
|
||||
#include "compression_api.h"
|
||||
#include "errors.h"
|
||||
#include "hypertable.h"
|
||||
#include "hypertable_cache.h"
|
||||
#include "policy_utils.h"
|
||||
#include "utils.h"
|
||||
#include "guc.h"
|
||||
#include "jsonb_utils.h"
|
||||
#include "bgw/job.h"
|
||||
#include "bgw_policy/job.h"
|
||||
|
||||
#include "bgw_policy/continuous_aggregate_api.h"
|
||||
#include "bgw_policy/job_api.h"
|
||||
#include "bgw_policy/job.h"
|
||||
#include "bgw_policy/policies_v2.h"
|
||||
#include "bgw/job_stat.h"
|
||||
#include "bgw/job.h"
|
||||
#include "bgw/timer.h"
|
||||
#include "errors.h"
|
||||
#include "guc.h"
|
||||
#include "hypertable_cache.h"
|
||||
#include "hypertable.h"
|
||||
#include "jsonb_utils.h"
|
||||
#include "policy_utils.h"
|
||||
#include "utils.h"
|
||||
|
||||
/* Default max runtime is unlimited for compress chunks */
|
||||
#define DEFAULT_MAX_RUNTIME \
|
||||
DatumGetIntervalP(DirectFunctionCall3(interval_in, CStringGetDatum("0"), InvalidOid, -1))
|
||||
|
||||
/* Right now, there is an infinite number of retries for compress_chunks jobs */
|
||||
#define DEFAULT_MAX_RETRIES (-1)
|
||||
/* Default retry period for reorder_jobs is currently 1 hour */
|
||||
#define DEFAULT_RETRY_PERIOD \
|
||||
DatumGetIntervalP(DirectFunctionCall3(interval_in, CStringGetDatum("1 hour"), InvalidOid, -1))
|
||||
@ -350,7 +350,7 @@ policy_compression_add_internal(Oid user_rel_oid, Datum compress_after_datum,
|
||||
job_id = ts_bgw_job_insert_relation(&application_name,
|
||||
default_schedule_interval,
|
||||
DEFAULT_MAX_RUNTIME,
|
||||
DEFAULT_MAX_RETRIES,
|
||||
JOB_RETRY_UNLIMITED,
|
||||
DEFAULT_RETRY_PERIOD,
|
||||
&proc_schema,
|
||||
&proc_name,
|
||||
|
@ -13,28 +13,27 @@
|
||||
|
||||
#include <jsonb_utils.h>
|
||||
#include <utils/builtins.h>
|
||||
|
||||
#include "bgw_policy/continuous_aggregate_api.h"
|
||||
#include "bgw_policy/job_api.h"
|
||||
#include "bgw_policy/job.h"
|
||||
#include "bgw_policy/policies_v2.h"
|
||||
#include "bgw_policy/policy_utils.h"
|
||||
#include "bgw/job_stat.h"
|
||||
#include "bgw/job.h"
|
||||
#include "ts_catalog/continuous_agg.h"
|
||||
#include "bgw/timer.h"
|
||||
#include "continuous_aggs/materialize.h"
|
||||
#include "dimension.h"
|
||||
#include "hypertable_cache.h"
|
||||
#include "time_utils.h"
|
||||
#include "policy_utils.h"
|
||||
#include "guc.h"
|
||||
#include "bgw_policy/policies_v2.h"
|
||||
#include "bgw/job_stat.h"
|
||||
#include "bgw/timer.h"
|
||||
#include "hypertable_cache.h"
|
||||
#include "policy_utils.h"
|
||||
#include "time_utils.h"
|
||||
#include "ts_catalog/continuous_agg.h"
|
||||
|
||||
/* Default max runtime for a continuous aggregate jobs is unlimited for now */
|
||||
#define DEFAULT_MAX_RUNTIME \
|
||||
DatumGetIntervalP(DirectFunctionCall3(interval_in, CStringGetDatum("0"), InvalidOid, -1))
|
||||
|
||||
/* infinite number of retries for continuous aggregate jobs */
|
||||
#define DEFAULT_MAX_RETRIES (-1)
|
||||
|
||||
int32
|
||||
policy_continuous_aggregate_get_mat_hypertable_id(const Jsonb *config)
|
||||
{
|
||||
@ -630,7 +629,7 @@ policy_refresh_cagg_add_internal(Oid cagg_oid, Oid start_offset_type, NullableDa
|
||||
job_id = ts_bgw_job_insert_relation(&application_name,
|
||||
&refresh_interval,
|
||||
DEFAULT_MAX_RUNTIME,
|
||||
DEFAULT_MAX_RETRIES,
|
||||
JOB_RETRY_UNLIMITED,
|
||||
&refresh_interval,
|
||||
&proc_schema,
|
||||
&proc_name,
|
||||
|
@ -25,8 +25,6 @@
|
||||
/* Default max runtime for a custom job is unlimited for now */
|
||||
#define DEFAULT_MAX_RUNTIME 0
|
||||
|
||||
/* Right now, there is an infinite number of retries for custom jobs */
|
||||
#define DEFAULT_MAX_RETRIES (-1)
|
||||
/* Default retry period for reorder_jobs is currently 5 minutes */
|
||||
#define DEFAULT_RETRY_PERIOD (5 * USECS_PER_MINUTE)
|
||||
|
||||
@ -176,7 +174,7 @@ job_add(PG_FUNCTION_ARGS)
|
||||
job_id = ts_bgw_job_insert_relation(&application_name,
|
||||
schedule_interval,
|
||||
&max_runtime,
|
||||
DEFAULT_MAX_RETRIES,
|
||||
JOB_RETRY_UNLIMITED,
|
||||
&retry_period,
|
||||
&proc_schema,
|
||||
&proc_name,
|
||||
|
@ -7,6 +7,10 @@
|
||||
|
||||
#include <postgres.h>
|
||||
|
||||
/* Special values for the number of retries of a failed job */
|
||||
#define JOB_RETRY_UNLIMITED (-1)
|
||||
#define JOB_RETRY_NONE 0
|
||||
|
||||
extern Datum job_add(PG_FUNCTION_ARGS);
|
||||
extern Datum job_alter(PG_FUNCTION_ARGS);
|
||||
extern Datum job_delete(PG_FUNCTION_ARGS);
|
||||
|
@ -19,17 +19,17 @@
|
||||
#include <hypertable_cache.h>
|
||||
#include <jsonb_utils.h>
|
||||
|
||||
#include "bgw/job.h"
|
||||
#include "bgw_policy/job_api.h"
|
||||
#include "bgw_policy/job.h"
|
||||
#include "bgw_policy/reorder_api.h"
|
||||
#include "bgw/job_stat.h"
|
||||
#include "bgw/job.h"
|
||||
#include "bgw/timer.h"
|
||||
#include "errors.h"
|
||||
#include "guc.h"
|
||||
#include "hypertable.h"
|
||||
#include "reorder.h"
|
||||
#include "utils.h"
|
||||
#include "guc.h"
|
||||
#include "bgw/job_stat.h"
|
||||
#include "bgw/timer.h"
|
||||
|
||||
/*
|
||||
* Default scheduled interval for reorder jobs should be 1/2 of the default chunk length.
|
||||
* If no such length is specified for the hypertable, then
|
||||
@ -43,8 +43,7 @@
|
||||
/* Default max runtime for a reorder job is unlimited for now */
|
||||
#define DEFAULT_MAX_RUNTIME \
|
||||
DatumGetIntervalP(DirectFunctionCall3(interval_in, CStringGetDatum("0"), InvalidOid, -1))
|
||||
/* Right now, there is an infinite number of retries for reorder jobs */
|
||||
#define DEFAULT_MAX_RETRIES (-1)
|
||||
|
||||
/* Default retry period for reorder_jobs is currently 5 minutes */
|
||||
#define DEFAULT_RETRY_PERIOD \
|
||||
DatumGetIntervalP(DirectFunctionCall3(interval_in, CStringGetDatum("5 min"), InvalidOid, -1))
|
||||
@ -272,7 +271,7 @@ policy_reorder_add(PG_FUNCTION_ARGS)
|
||||
job_id = ts_bgw_job_insert_relation(&application_name,
|
||||
&schedule_interval,
|
||||
DEFAULT_MAX_RUNTIME,
|
||||
DEFAULT_MAX_RETRIES,
|
||||
JOB_RETRY_UNLIMITED,
|
||||
DEFAULT_RETRY_PERIOD,
|
||||
&proc_schema,
|
||||
&proc_name,
|
||||
|
@ -1646,7 +1646,95 @@ DETAIL: owner of job 1026
|
||||
DELETE FROM _timescaledb_config.bgw_job WHERE id = :job_id;
|
||||
-- This should succeed
|
||||
DROP USER renamed_user;
|
||||
--
|
||||
-- Test without retry
|
||||
--
|
||||
\c :TEST_DBNAME :ROLE_SUPERUSER
|
||||
TRUNCATE bgw_log;
|
||||
TRUNCATE _timescaledb_internal.bgw_job_stat;
|
||||
SELECT ts_bgw_params_reset_time();
|
||||
ts_bgw_params_reset_time
|
||||
--------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
SELECT ts_bgw_params_mock_wait_returns_immediately(:WAIT_ON_JOB);
|
||||
ts_bgw_params_mock_wait_returns_immediately
|
||||
---------------------------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
DELETE FROM _timescaledb_config.bgw_job;
|
||||
INSERT INTO _timescaledb_config.bgw_job(application_name, schedule_interval, max_runtime, max_retries, retry_period, proc_schema, proc_name) VALUES('bgw_test_job_2_error', INTERVAL '5000ms', INTERVAL '20ms', 0, INTERVAL '20ms', 'public', 'bgw_test_job_2_error') RETURNING id;
|
||||
id
|
||||
------
|
||||
1027
|
||||
(1 row)
|
||||
|
||||
\c :TEST_DBNAME :ROLE_DEFAULT_PERM_USER
|
||||
-- Run the first time
|
||||
SELECT ts_bgw_db_scheduler_test_run_and_wait_for_scheduler_finish(25);
|
||||
ts_bgw_db_scheduler_test_run_and_wait_for_scheduler_finish
|
||||
------------------------------------------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
SELECT job_id, last_run_success, total_runs, total_successes, total_failures, total_crashes FROM _timescaledb_internal.bgw_job_stat;
|
||||
job_id | last_run_success | total_runs | total_successes | total_failures | total_crashes
|
||||
--------+------------------+------------+-----------------+----------------+---------------
|
||||
1027 | f | 1 | 0 | 1 | 0
|
||||
(1 row)
|
||||
|
||||
SELECT * FROM sorted_bgw_log;
|
||||
msg_no | application_name | msg
|
||||
--------+----------------------+-----------------------------------------------------------
|
||||
0 | DB Scheduler | [TESTING] Registered new background worker
|
||||
1 | DB Scheduler | [TESTING] Wait until (RANDOM), started at (RANDOM)
|
||||
1 | bgw_test_job_2_error | job 1027 reached max_retries after 1 consecutive failures
|
||||
2 | bgw_test_job_2_error | job 1027 threw an error
|
||||
3 | bgw_test_job_2_error | Error job 2
|
||||
2 | DB Scheduler | [TESTING] Wait until (RANDOM), started at (RANDOM)
|
||||
(6 rows)
|
||||
|
||||
SELECT last_finish, last_successful_finish, last_run_success FROM _timescaledb_internal.bgw_job_stat;
|
||||
last_finish | last_successful_finish | last_run_success
|
||||
------------------------------+------------------------+------------------
|
||||
Fri Dec 31 16:00:00 1999 PST | -infinity | f
|
||||
(1 row)
|
||||
|
||||
-- Run the second time
|
||||
SELECT ts_bgw_db_scheduler_test_run_and_wait_for_scheduler_finish(100, 50);
|
||||
ts_bgw_db_scheduler_test_run_and_wait_for_scheduler_finish
|
||||
------------------------------------------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
SELECT job_id, last_run_success, total_runs, total_successes, total_failures, total_crashes FROM _timescaledb_internal.bgw_job_stat;
|
||||
job_id | last_run_success | total_runs | total_successes | total_failures | total_crashes
|
||||
--------+------------------+------------+-----------------+----------------+---------------
|
||||
1027 | f | 1 | 0 | 1 | 0
|
||||
(1 row)
|
||||
|
||||
SELECT * FROM sorted_bgw_log;
|
||||
msg_no | application_name | msg
|
||||
--------+----------------------+-----------------------------------------------------------
|
||||
0 | DB Scheduler | [TESTING] Registered new background worker
|
||||
1 | DB Scheduler | [TESTING] Wait until (RANDOM), started at (RANDOM)
|
||||
1 | bgw_test_job_2_error | job 1027 reached max_retries after 1 consecutive failures
|
||||
2 | bgw_test_job_2_error | job 1027 threw an error
|
||||
3 | bgw_test_job_2_error | Error job 2
|
||||
2 | DB Scheduler | [TESTING] Wait until (RANDOM), started at (RANDOM)
|
||||
0 | DB Scheduler | [TESTING] Wait until (RANDOM), started at (RANDOM)
|
||||
(7 rows)
|
||||
|
||||
SELECT last_finish, last_successful_finish, last_run_success FROM _timescaledb_internal.bgw_job_stat;
|
||||
last_finish | last_successful_finish | last_run_success
|
||||
------------------------------+------------------------+------------------
|
||||
Fri Dec 31 16:00:00 1999 PST | -infinity | f
|
||||
(1 row)
|
||||
|
||||
-- clean up jobs
|
||||
\c :TEST_DBNAME :ROLE_SUPERUSER
|
||||
SELECT _timescaledb_functions.stop_background_workers();
|
||||
stop_background_workers
|
||||
-------------------------
|
||||
|
@ -697,8 +697,31 @@ DELETE FROM _timescaledb_config.bgw_job WHERE id = :job_id;
|
||||
-- This should succeed
|
||||
DROP USER renamed_user;
|
||||
|
||||
|
||||
--
|
||||
-- Test without retry
|
||||
--
|
||||
\c :TEST_DBNAME :ROLE_SUPERUSER
|
||||
TRUNCATE bgw_log;
|
||||
TRUNCATE _timescaledb_internal.bgw_job_stat;
|
||||
SELECT ts_bgw_params_reset_time();
|
||||
SELECT ts_bgw_params_mock_wait_returns_immediately(:WAIT_ON_JOB);
|
||||
DELETE FROM _timescaledb_config.bgw_job;
|
||||
INSERT INTO _timescaledb_config.bgw_job(application_name, schedule_interval, max_runtime, max_retries, retry_period, proc_schema, proc_name) VALUES('bgw_test_job_2_error', INTERVAL '5000ms', INTERVAL '20ms', 0, INTERVAL '20ms', 'public', 'bgw_test_job_2_error') RETURNING id;
|
||||
\c :TEST_DBNAME :ROLE_DEFAULT_PERM_USER
|
||||
|
||||
-- Run the first time
|
||||
SELECT ts_bgw_db_scheduler_test_run_and_wait_for_scheduler_finish(25);
|
||||
SELECT job_id, last_run_success, total_runs, total_successes, total_failures, total_crashes FROM _timescaledb_internal.bgw_job_stat;
|
||||
SELECT * FROM sorted_bgw_log;
|
||||
SELECT last_finish, last_successful_finish, last_run_success FROM _timescaledb_internal.bgw_job_stat;
|
||||
|
||||
-- Run the second time
|
||||
SELECT ts_bgw_db_scheduler_test_run_and_wait_for_scheduler_finish(100, 50);
|
||||
SELECT job_id, last_run_success, total_runs, total_successes, total_failures, total_crashes FROM _timescaledb_internal.bgw_job_stat;
|
||||
SELECT * FROM sorted_bgw_log;
|
||||
SELECT last_finish, last_successful_finish, last_run_success FROM _timescaledb_internal.bgw_job_stat;
|
||||
|
||||
-- clean up jobs
|
||||
\c :TEST_DBNAME :ROLE_SUPERUSER
|
||||
SELECT _timescaledb_functions.stop_background_workers();
|
||||
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user