mirror of
https://github.com/timescale/timescaledb.git
synced 2025-05-16 02:23:49 +08:00
Fix wrong crash error message on job history
Currently while a job is running we set `pid = SchedulerPid`, `succeed = false` and `execution_finish=NOW()` and it leads to confusion when querying either `timescaledb_information.job_errors` or `timescaledb_information.job_history` views showing in the `err_message = job crash detected, see server logs`. This information is wrong and create confusion. Fixed it by setting `succeed=NULL` and `pid=NULL` when the scheduler launch the job and then when the job worker start to work then set `pid=MyProcPid` (the worker PID) meaning that the job started and didn't finished yet, and at the end of the execution we set `succeed=TRUE or FALSE` and the `execution_finish=NOW()` to mark the end of the job execution. Also adjusted the views to expose the information properly.
This commit is contained in:
parent
6ce2fc0df4
commit
6063464f6d
@ -317,7 +317,7 @@ CREATE TABLE _timescaledb_internal.bgw_job_stat_history (
|
||||
pid INTEGER,
|
||||
execution_start TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
execution_finish TIMESTAMPTZ,
|
||||
succeeded boolean NOT NULL DEFAULT FALSE,
|
||||
succeeded boolean,
|
||||
data jsonb,
|
||||
-- table constraints
|
||||
CONSTRAINT bgw_job_stat_history_pkey PRIMARY KEY (id)
|
||||
|
@ -1 +1,3 @@
|
||||
|
||||
ALTER TABLE _timescaledb_internal.bgw_job_stat_history
|
||||
ALTER COLUMN succeeded DROP NOT NULL,
|
||||
ALTER COLUMN succeeded DROP DEFAULT;
|
||||
|
@ -0,0 +1,5 @@
|
||||
UPDATE _timescaledb_internal.bgw_job_stat_history SET succeeded = FALSE WHERE succeeded IS NULL;
|
||||
|
||||
ALTER TABLE _timescaledb_internal.bgw_job_stat_history
|
||||
ALTER COLUMN succeeded SET NOT NULL,
|
||||
ALTER COLUMN succeeded SET DEFAULT FALSE;
|
@ -280,33 +280,34 @@ ORDER BY hypertable_name,
|
||||
CREATE OR REPLACE VIEW timescaledb_information.job_errors
|
||||
WITH (security_barrier = true) AS
|
||||
SELECT
|
||||
job_id,
|
||||
data->'job'->>'proc_schema' as proc_schema,
|
||||
data->'job'->>'proc_name' as proc_name,
|
||||
pid,
|
||||
execution_start AS start_time,
|
||||
execution_finish AS finish_time,
|
||||
data->'error_data'->>'sqlerrcode' AS sqlerrcode,
|
||||
CASE WHEN data->'error_data'->>'message' IS NOT NULL THEN
|
||||
CASE WHEN data->'error_data'->>'detail' IS NOT NULL THEN
|
||||
CASE WHEN data->'error_data'->>'hint' IS NOT NULL THEN concat(data->'error_data'->>'message', '. ', data->'error_data'->>'detail', '. ', data->'error_data'->>'hint')
|
||||
ELSE concat(data->'error_data'->>'message', ' ', data->'error_data'->>'detail')
|
||||
END
|
||||
ELSE
|
||||
CASE WHEN data->'error_data'->>'hint' IS NOT NULL THEN concat(data->'error_data'->>'message', '. ', data->'error_data'->>'hint')
|
||||
ELSE data->'error_data'->>'message'
|
||||
END
|
||||
END
|
||||
ELSE
|
||||
h.job_id,
|
||||
h.data->'job'->>'proc_schema' as proc_schema,
|
||||
h.data->'job'->>'proc_name' as proc_name,
|
||||
h.pid,
|
||||
h.execution_start AS start_time,
|
||||
h.execution_finish AS finish_time,
|
||||
h.data->'error_data'->>'sqlerrcode' AS sqlerrcode,
|
||||
CASE
|
||||
WHEN h.succeeded IS NULL AND h.execution_finish IS NULL AND h.pid IS NULL THEN
|
||||
'job crash detected, see server logs'
|
||||
WHEN h.data->'error_data'->>'message' IS NOT NULL THEN
|
||||
CASE WHEN h.data->'error_data'->>'detail' IS NOT NULL THEN
|
||||
CASE WHEN h.data->'error_data'->>'hint' IS NOT NULL THEN concat(h.data->'error_data'->>'message', '. ', h.data->'error_data'->>'detail', '. ', h.data->'error_data'->>'hint')
|
||||
ELSE concat(h.data->'error_data'->>'message', ' ', h.data->'error_data'->>'detail')
|
||||
END
|
||||
AS err_message
|
||||
ELSE
|
||||
CASE WHEN h.data->'error_data'->>'hint' IS NOT NULL THEN concat(h.data->'error_data'->>'message', '. ', h.data->'error_data'->>'hint')
|
||||
ELSE h.data->'error_data'->>'message'
|
||||
END
|
||||
END
|
||||
END AS err_message
|
||||
FROM
|
||||
_timescaledb_internal.bgw_job_stat_history
|
||||
_timescaledb_internal.bgw_job_stat_history h
|
||||
LEFT JOIN
|
||||
_timescaledb_config.bgw_job ON (bgw_job.id = bgw_job_stat_history.job_id)
|
||||
_timescaledb_config.bgw_job j ON (j.id = h.job_id)
|
||||
WHERE
|
||||
succeeded IS FALSE
|
||||
h.succeeded IS FALSE
|
||||
OR h.succeeded IS NULL
|
||||
AND (pg_catalog.pg_has_role(current_user,
|
||||
(SELECT pg_catalog.pg_get_userbyid(datdba)
|
||||
FROM pg_catalog.pg_database
|
||||
@ -328,6 +329,8 @@ SELECT
|
||||
h.data->'job'->'config' AS config,
|
||||
h.data->'error_data'->>'sqlerrcode' AS sqlerrcode,
|
||||
CASE
|
||||
WHEN h.succeeded IS NULL AND h.execution_finish IS NULL AND h.pid IS NULL THEN
|
||||
'job crash detected, see server logs'
|
||||
WHEN h.succeeded IS FALSE AND h.data->'error_data'->>'message' IS NOT NULL THEN
|
||||
CASE WHEN h.data->'error_data'->>'detail' IS NOT NULL THEN
|
||||
CASE WHEN h.data->'error_data'->>'hint' IS NOT NULL THEN concat(h.data->'error_data'->>'message', '. ', h.data->'error_data'->>'detail', '. ', h.data->'error_data'->>'hint')
|
||||
@ -338,10 +341,6 @@ SELECT
|
||||
ELSE h.data->'error_data'->>'message'
|
||||
END
|
||||
END
|
||||
WHEN h.succeeded IS FALSE AND h.execution_finish IS NOT NULL THEN
|
||||
'job crash detected, see server logs'
|
||||
WHEN h.execution_finish IS NULL THEN
|
||||
E'job didn\'t finish yet'
|
||||
END AS err_message
|
||||
FROM
|
||||
_timescaledb_internal.bgw_job_stat_history h
|
||||
|
@ -33,6 +33,7 @@
|
||||
#include <utils/timestamp.h>
|
||||
|
||||
#include "compat/compat.h"
|
||||
#include "bgw/job_stat_history.h"
|
||||
#include "bgw/scheduler.h"
|
||||
#include "bgw_policy/chunk_stats.h"
|
||||
#include "bgw_policy/policy.h"
|
||||
@ -1151,6 +1152,7 @@ ts_bgw_job_entrypoint(PG_FUNCTION_ARGS)
|
||||
INSTR_TIME_SET_CURRENT(start);
|
||||
|
||||
StartTransactionCommand();
|
||||
|
||||
/* Grab a session lock on the job row to prevent concurrent deletes. Lock is released
|
||||
* when the job process exits */
|
||||
job = ts_bgw_job_find_with_lock(params.job_id,
|
||||
@ -1159,14 +1161,16 @@ ts_bgw_job_entrypoint(PG_FUNCTION_ARGS)
|
||||
SESSION_LOCK,
|
||||
/* block */ true,
|
||||
&got_lock);
|
||||
CommitTransactionCommand();
|
||||
|
||||
if (job == NULL)
|
||||
/* If the job is not found, we can't proceed */
|
||||
elog(ERROR, "job %d not found when running the background worker", params.job_id);
|
||||
|
||||
/* get parameters from bgworker */
|
||||
job->job_history.id = params.job_history_id;
|
||||
job->job_history.execution_start = params.job_history_execution_start;
|
||||
ts_bgw_job_stat_history_update(JOB_STAT_HISTORY_UPDATE_PID, job, JOB_SUCCESS, NULL);
|
||||
|
||||
CommitTransactionCommand();
|
||||
|
||||
elog(DEBUG2, "job %d (%s) found", params.job_id, NameStr(job->fd.application_name));
|
||||
|
||||
|
@ -650,7 +650,7 @@ ts_bgw_job_stat_mark_start(BgwJob *job)
|
||||
job->job_history.execution_start = ts_timer_get_current_timestamp();
|
||||
job->job_history.id = INVALID_BGW_JOB_STAT_HISTORY_ID;
|
||||
|
||||
ts_bgw_job_stat_history_mark_start(job);
|
||||
ts_bgw_job_stat_history_update(JOB_STAT_HISTORY_UPDATE_START, job, JOB_SUCCESS, NULL);
|
||||
|
||||
pgstat_report_activity(STATE_IDLE, NULL);
|
||||
}
|
||||
@ -674,7 +674,7 @@ ts_bgw_job_stat_mark_end(BgwJob *job, JobResult result, Jsonb *edata)
|
||||
errmsg("unable to find job statistics for job %d", job->fd.id)));
|
||||
}
|
||||
|
||||
ts_bgw_job_stat_history_mark_end(job, result, edata);
|
||||
ts_bgw_job_stat_history_update(JOB_STAT_HISTORY_UPDATE_END, job, result, edata);
|
||||
|
||||
pgstat_report_activity(STATE_IDLE, NULL);
|
||||
}
|
||||
@ -693,7 +693,7 @@ ts_bgw_job_stat_mark_crash_reported(BgwJob *job, JobResult result)
|
||||
errmsg("unable to find job statistics for job %d", job->fd.id)));
|
||||
}
|
||||
|
||||
ts_bgw_job_stat_history_mark_end(job, result, NULL);
|
||||
ts_bgw_job_stat_history_update(JOB_STAT_HISTORY_UPDATE_END, job, result, NULL);
|
||||
|
||||
pgstat_report_activity(STATE_IDLE, NULL);
|
||||
}
|
||||
|
@ -18,8 +18,9 @@
|
||||
|
||||
typedef struct BgwJobStatHistoryContext
|
||||
{
|
||||
BgwJob *job;
|
||||
JobResult result;
|
||||
BgwJobStatHistoryUpdateType update_type;
|
||||
BgwJob *job;
|
||||
Jsonb *edata;
|
||||
} BgwJobStatHistoryContext;
|
||||
|
||||
@ -90,7 +91,7 @@ ts_bgw_job_stat_history_build_data_info(BgwJobStatHistoryContext *context)
|
||||
}
|
||||
|
||||
static void
|
||||
ts_bgw_job_stat_history_insert(BgwJobStatHistoryContext *context)
|
||||
bgw_job_stat_history_insert(BgwJobStatHistoryContext *context, bool track_only_errors)
|
||||
{
|
||||
Assert(context != NULL);
|
||||
|
||||
@ -101,16 +102,29 @@ ts_bgw_job_stat_history_insert(BgwJobStatHistoryContext *context)
|
||||
CatalogSecurityContext sec_ctx;
|
||||
|
||||
ts_datum_set_int32(Anum_bgw_job_stat_history_job_id, values, context->job->fd.id, false);
|
||||
ts_datum_set_int32(Anum_bgw_job_stat_history_pid, values, MyProcPid, false);
|
||||
ts_datum_set_timestamptz(Anum_bgw_job_stat_history_execution_start,
|
||||
values,
|
||||
context->job->job_history.execution_start,
|
||||
false);
|
||||
ts_datum_set_timestamptz(Anum_bgw_job_stat_history_execution_finish, values, 0, true);
|
||||
if (track_only_errors)
|
||||
{
|
||||
/* In case of logging only ERRORs */
|
||||
ts_datum_set_int32(Anum_bgw_job_stat_history_pid, values, MyProcPid, false);
|
||||
ts_datum_set_timestamptz(Anum_bgw_job_stat_history_execution_finish,
|
||||
values,
|
||||
ts_timer_get_current_timestamp(),
|
||||
false);
|
||||
ts_datum_set_bool(Anum_bgw_job_stat_history_succeeded, values, false, false);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* When tracking history first we INSERT the job without the FINISH execution timestamp,
|
||||
* PID and SUCCEED flag because it will be marked once the job finishes */
|
||||
ts_datum_set_int32(Anum_bgw_job_stat_history_pid, values, 0, true);
|
||||
ts_datum_set_timestamptz(Anum_bgw_job_stat_history_execution_finish, values, 0, true);
|
||||
ts_datum_set_bool(Anum_bgw_job_stat_history_succeeded, values, false, true);
|
||||
}
|
||||
|
||||
ts_datum_set_jsonb(Anum_bgw_job_stat_history_data,
|
||||
values,
|
||||
ts_bgw_job_stat_history_build_data_info(context));
|
||||
@ -131,18 +145,14 @@ ts_bgw_job_stat_history_insert(BgwJobStatHistoryContext *context)
|
||||
table_close(rel, NoLock);
|
||||
}
|
||||
|
||||
void
|
||||
ts_bgw_job_stat_history_mark_start(BgwJob *job)
|
||||
static void
|
||||
bgw_job_stat_history_mark_start(BgwJobStatHistoryContext *context)
|
||||
{
|
||||
/* Don't mark the start in case of the GUC be disabled */
|
||||
if (!ts_guc_enable_job_execution_logging)
|
||||
return;
|
||||
|
||||
BgwJobStatHistoryContext context = {
|
||||
.job = job,
|
||||
};
|
||||
|
||||
ts_bgw_job_stat_history_insert(&context);
|
||||
bgw_job_stat_history_insert(context, false);
|
||||
}
|
||||
|
||||
static bool
|
||||
@ -192,19 +202,29 @@ bgw_job_stat_history_scan_id(int64 bgw_job_history_id, tuple_found_func tuple_fo
|
||||
}
|
||||
|
||||
static ScanTupleResult
|
||||
bgw_job_stat_history_tuple_mark_end(TupleInfo *ti, void *const data)
|
||||
bgw_job_stat_history_tuple_update(TupleInfo *ti, void *const data)
|
||||
{
|
||||
bool should_free;
|
||||
HeapTuple tuple = ts_scanner_fetch_heap_tuple(ti, false, &should_free);
|
||||
BgwJobStatHistoryContext *context = (BgwJobStatHistoryContext *) data;
|
||||
Jsonb *job_history_data = NULL;
|
||||
|
||||
Datum values[Natts_bgw_job_stat_history] = { 0 };
|
||||
bool nulls[Natts_bgw_job_stat_history] = { 0 };
|
||||
bool doReplace[Natts_bgw_job_stat_history] = { 0 };
|
||||
|
||||
values[AttrNumberGetAttrOffset(Anum_bgw_job_stat_history_pid)] = Int32GetDatum(MyProcPid);
|
||||
switch (context->update_type)
|
||||
{
|
||||
case JOB_STAT_HISTORY_UPDATE_PID:
|
||||
{
|
||||
values[AttrNumberGetAttrOffset(Anum_bgw_job_stat_history_pid)] =
|
||||
Int32GetDatum(MyProcPid);
|
||||
doReplace[AttrNumberGetAttrOffset(Anum_bgw_job_stat_history_pid)] = true;
|
||||
break;
|
||||
}
|
||||
|
||||
case JOB_STAT_HISTORY_UPDATE_END:
|
||||
{
|
||||
values[AttrNumberGetAttrOffset(Anum_bgw_job_stat_history_execution_finish)] =
|
||||
TimestampTzGetDatum(ts_timer_get_current_timestamp());
|
||||
doReplace[AttrNumberGetAttrOffset(Anum_bgw_job_stat_history_execution_finish)] = true;
|
||||
@ -213,7 +233,7 @@ bgw_job_stat_history_tuple_mark_end(TupleInfo *ti, void *const data)
|
||||
BoolGetDatum((context->result == JOB_SUCCESS));
|
||||
doReplace[AttrNumberGetAttrOffset(Anum_bgw_job_stat_history_succeeded)] = true;
|
||||
|
||||
Jsonb *job_history_data = ts_bgw_job_stat_history_build_data_info(context);
|
||||
job_history_data = ts_bgw_job_stat_history_build_data_info(context);
|
||||
|
||||
if (job_history_data != NULL)
|
||||
{
|
||||
@ -221,6 +241,13 @@ bgw_job_stat_history_tuple_mark_end(TupleInfo *ti, void *const data)
|
||||
JsonbPGetDatum(job_history_data);
|
||||
doReplace[AttrNumberGetAttrOffset(Anum_bgw_job_stat_history_data)] = true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case JOB_STAT_HISTORY_UPDATE_START:
|
||||
pg_unreachable();
|
||||
break;
|
||||
}
|
||||
|
||||
HeapTuple new_tuple =
|
||||
heap_modify_tuple(tuple, ts_scanner_get_tupledesc(ti), values, nulls, doReplace);
|
||||
@ -235,44 +262,65 @@ bgw_job_stat_history_tuple_mark_end(TupleInfo *ti, void *const data)
|
||||
return SCAN_DONE;
|
||||
}
|
||||
|
||||
void
|
||||
ts_bgw_job_stat_history_mark_end(BgwJob *job, JobResult result, Jsonb *edata)
|
||||
static void
|
||||
bgw_job_stat_history_update(BgwJobStatHistoryContext *context)
|
||||
{
|
||||
/* Don't execute in case of the GUC is false and the job succeeded, because failures are always
|
||||
* logged
|
||||
*/
|
||||
if (!ts_guc_enable_job_execution_logging && result == JOB_SUCCESS)
|
||||
if (!ts_guc_enable_job_execution_logging && context->result == JOB_SUCCESS)
|
||||
return;
|
||||
|
||||
/* Re-read the job information because it can change during the execution by using the
|
||||
* `alter_job` API inside the function/procedure (i.e. job config) */
|
||||
BgwJob *new_job = ts_bgw_job_find(job->fd.id, CurrentMemoryContext, true);
|
||||
BgwJob *new_job = ts_bgw_job_find(context->job->fd.id, CurrentMemoryContext, true);
|
||||
|
||||
/* Set the job history information */
|
||||
new_job->job_history = job->job_history;
|
||||
new_job->job_history = context->job->job_history;
|
||||
|
||||
BgwJobStatHistoryContext context = {
|
||||
.job = new_job,
|
||||
.result = result,
|
||||
.edata = edata,
|
||||
};
|
||||
/* Use the newly loaded job in the current context to use this information to register the
|
||||
* execution history */
|
||||
context->job = new_job;
|
||||
|
||||
/* Failures are always logged so in case of the GUC is false and a failure happens then we need
|
||||
* to insert all the information in the job error history table */
|
||||
if (!ts_guc_enable_job_execution_logging && result != JOB_SUCCESS)
|
||||
if (!ts_guc_enable_job_execution_logging && context->result != JOB_SUCCESS)
|
||||
{
|
||||
ts_bgw_job_stat_history_insert(&context);
|
||||
bgw_job_stat_history_insert(context, true);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Mark the end of the previous inserted start execution */
|
||||
if (!bgw_job_stat_history_scan_id(new_job->job_history.id,
|
||||
bgw_job_stat_history_tuple_mark_end,
|
||||
bgw_job_stat_history_tuple_update,
|
||||
NULL,
|
||||
&context,
|
||||
context,
|
||||
RowExclusiveLock))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("unable to find job history " INT64_FORMAT, new_job->job_history.id)));
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
ts_bgw_job_stat_history_update(BgwJobStatHistoryUpdateType update_type, BgwJob *job,
|
||||
JobResult result, Jsonb *edata)
|
||||
{
|
||||
BgwJobStatHistoryContext context = {
|
||||
.result = result,
|
||||
.update_type = update_type,
|
||||
.job = job,
|
||||
.edata = edata,
|
||||
};
|
||||
|
||||
switch (update_type)
|
||||
{
|
||||
case JOB_STAT_HISTORY_UPDATE_START:
|
||||
bgw_job_stat_history_mark_start(&context);
|
||||
break;
|
||||
case JOB_STAT_HISTORY_UPDATE_END:
|
||||
case JOB_STAT_HISTORY_UPDATE_PID:
|
||||
bgw_job_stat_history_update(&context);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -11,5 +11,12 @@
|
||||
|
||||
#define INVALID_BGW_JOB_STAT_HISTORY_ID 0
|
||||
|
||||
extern void ts_bgw_job_stat_history_mark_start(BgwJob *job);
|
||||
extern void ts_bgw_job_stat_history_mark_end(BgwJob *job, JobResult result, Jsonb *edata);
|
||||
typedef enum BgwJobStatHistoryUpdateType
|
||||
{
|
||||
JOB_STAT_HISTORY_UPDATE_START,
|
||||
JOB_STAT_HISTORY_UPDATE_END,
|
||||
JOB_STAT_HISTORY_UPDATE_PID,
|
||||
} BgwJobStatHistoryUpdateType;
|
||||
|
||||
extern void ts_bgw_job_stat_history_update(BgwJobStatHistoryUpdateType update_type, BgwJob *job,
|
||||
JobResult result, Jsonb *edata);
|
||||
|
@ -330,16 +330,19 @@ ts_datum_set_text_from_cstring(const AttrNumber attno, NullableDatum *datums, co
|
||||
}
|
||||
|
||||
static inline void
|
||||
ts_datum_set_bool(const AttrNumber attno, NullableDatum *datums, const bool value)
|
||||
ts_datum_set_bool(const AttrNumber attno, NullableDatum *datums, const bool value,
|
||||
const bool isnull)
|
||||
{
|
||||
if (!isnull)
|
||||
datums[AttrNumberGetAttrOffset(attno)].value = BoolGetDatum(value);
|
||||
datums[AttrNumberGetAttrOffset(attno)].isnull = false;
|
||||
datums[AttrNumberGetAttrOffset(attno)].isnull = isnull;
|
||||
}
|
||||
|
||||
static inline void
|
||||
ts_datum_set_int32(const AttrNumber attno, NullableDatum *datums, const int32 value,
|
||||
const bool isnull)
|
||||
{
|
||||
if (!isnull)
|
||||
datums[AttrNumberGetAttrOffset(attno)].value = Int32GetDatum(value);
|
||||
datums[AttrNumberGetAttrOffset(attno)].isnull = isnull;
|
||||
}
|
||||
@ -348,6 +351,7 @@ static inline void
|
||||
ts_datum_set_int64(const AttrNumber attno, NullableDatum *datums, const int64 value,
|
||||
const bool isnull)
|
||||
{
|
||||
if (!isnull)
|
||||
datums[AttrNumberGetAttrOffset(attno)].value = Int64GetDatum(value);
|
||||
datums[AttrNumberGetAttrOffset(attno)].isnull = isnull;
|
||||
}
|
||||
@ -356,6 +360,7 @@ static inline void
|
||||
ts_datum_set_timestamptz(const AttrNumber attno, NullableDatum *datums, const TimestampTz value,
|
||||
const bool isnull)
|
||||
{
|
||||
if (!isnull)
|
||||
datums[AttrNumberGetAttrOffset(attno)].value = TimestampTzGetDatum(value);
|
||||
datums[AttrNumberGetAttrOffset(attno)].isnull = isnull;
|
||||
}
|
||||
|
@ -40,7 +40,7 @@ create_cagg_validate_query_datum(TupleDesc tupdesc, const bool is_valid_query,
|
||||
|
||||
tupdesc = BlessTupleDesc(tupdesc);
|
||||
|
||||
ts_datum_set_bool(Anum_cagg_validate_query_valid, datums, is_valid_query);
|
||||
ts_datum_set_bool(Anum_cagg_validate_query_valid, datums, is_valid_query, false);
|
||||
ts_datum_set_text_from_cstring(Anum_cagg_validate_query_error_level,
|
||||
datums,
|
||||
edata->elevel > 0 ? error_severity(edata->elevel) : NULL);
|
||||
@ -861,7 +861,10 @@ create_cagg_get_bucket_function_datum(TupleDesc tupdesc, ContinuousAggsBucketFun
|
||||
ts_datum_set_text_from_cstring(Anum_cagg_bucket_function_timezone,
|
||||
datums,
|
||||
bf->bucket_time_timezone);
|
||||
ts_datum_set_bool(Anum_cagg_bucket_function_fixed_width, datums, bf->bucket_fixed_interval);
|
||||
ts_datum_set_bool(Anum_cagg_bucket_function_fixed_width,
|
||||
datums,
|
||||
bf->bucket_fixed_interval,
|
||||
false);
|
||||
|
||||
Assert(tupdesc->natts == Natts_cagg_validate_query);
|
||||
tuple = ts_heap_form_tuple(tupdesc, datums);
|
||||
|
@ -28,12 +28,6 @@ SELECT _timescaledb_functions.start_background_workers();
|
||||
t
|
||||
(1 row)
|
||||
|
||||
SELECT pg_sleep(6);
|
||||
pg_sleep
|
||||
----------
|
||||
|
||||
(1 row)
|
||||
|
||||
SELECT add_job('custom_job_ok', schedule_interval => interval '1 hour', initial_start := now()) AS job_id_1 \gset
|
||||
SELECT add_job('custom_job_error', schedule_interval => interval '1 hour', initial_start := now()) AS job_id_2 \gset
|
||||
SELECT test.wait_for_job_to_run(:job_id_1, 1);
|
||||
@ -81,6 +75,8 @@ SELECT pg_reload_conf();
|
||||
t
|
||||
(1 row)
|
||||
|
||||
-- Reconnect to make sure the GUC is set
|
||||
\c :TEST_DBNAME :ROLE_SUPERUSER
|
||||
SELECT scheduled FROM alter_job(:job_id_1, next_start => now());
|
||||
scheduled
|
||||
-----------
|
||||
|
@ -25,6 +25,8 @@ SELECT pg_reload_conf();
|
||||
t
|
||||
(1 row)
|
||||
|
||||
-- Reconnect to make sure the GUC is set
|
||||
\c :TEST_DBNAME :ROLE_SUPERUSER
|
||||
-- test a concurrent update
|
||||
CREATE OR REPLACE PROCEDURE custom_proc1(jobid int, config jsonb) LANGUAGE PLPGSQL AS
|
||||
$$
|
||||
@ -102,6 +104,8 @@ SELECT pg_reload_conf();
|
||||
t
|
||||
(1 row)
|
||||
|
||||
-- Reconnect to make sure the GUC is set
|
||||
\c :TEST_DBNAME :ROLE_SUPERUSER
|
||||
-- test the retention job
|
||||
SELECT next_start FROM alter_job(3, next_start => '2060-01-01 00:00:00+00'::timestamptz);
|
||||
next_start
|
||||
@ -152,6 +156,34 @@ SELECT _timescaledb_functions.stop_background_workers();
|
||||
t
|
||||
(1 row)
|
||||
|
||||
-- Job didn't finish yet and Crash detected
|
||||
DELETE FROM _timescaledb_internal.bgw_job_stat_history;
|
||||
INSERT INTO _timescaledb_internal.bgw_job_stat_history(job_id, pid, succeeded, execution_start, execution_finish, data)
|
||||
VALUES (1, NULL, NULL, '2000-01-01 00:00:00+00'::timestamptz, NULL, '{}'), -- Crash server detected
|
||||
(2, 2222, false, '2000-01-01 00:00:00+00'::timestamptz, NULL, '{}'), -- Didn't finished yet
|
||||
(3, 3333, false, '2000-01-01 00:00:00+00'::timestamptz, '2000-01-01 01:00:00+00'::timestamptz, '{}'), -- Finish with ERROR
|
||||
(4, 4444, true, '2000-01-01 00:00:00+00'::timestamptz, '2000-01-01 01:00:00+00'::timestamptz, '{}'); -- Finish with SUCCESS
|
||||
SELECT job_id, pid, succeeded, start_time, finish_time, config, err_message
|
||||
FROM timescaledb_information.job_history
|
||||
ORDER BY job_id;
|
||||
job_id | pid | succeeded | start_time | finish_time | config | err_message
|
||||
--------+------+-----------+------------------------------+------------------------------+--------+-------------------------------------
|
||||
1 | | | Fri Dec 31 16:00:00 1999 PST | | | job crash detected, see server logs
|
||||
2 | 2222 | f | Fri Dec 31 16:00:00 1999 PST | | |
|
||||
3 | 3333 | f | Fri Dec 31 16:00:00 1999 PST | Fri Dec 31 17:00:00 1999 PST | |
|
||||
4 | 4444 | t | Fri Dec 31 16:00:00 1999 PST | Fri Dec 31 17:00:00 1999 PST | |
|
||||
(4 rows)
|
||||
|
||||
SELECT job_id, pid, start_time, finish_time, err_message
|
||||
FROM timescaledb_information.job_errors
|
||||
ORDER BY job_id;
|
||||
job_id | pid | start_time | finish_time | err_message
|
||||
--------+------+------------------------------+------------------------------+-------------------------------------
|
||||
1 | | Fri Dec 31 16:00:00 1999 PST | | job crash detected, see server logs
|
||||
2 | 2222 | Fri Dec 31 16:00:00 1999 PST | |
|
||||
3 | 3333 | Fri Dec 31 16:00:00 1999 PST | Fri Dec 31 17:00:00 1999 PST |
|
||||
(3 rows)
|
||||
|
||||
DELETE FROM _timescaledb_internal.bgw_job_stat;
|
||||
DELETE FROM _timescaledb_internal.bgw_job_stat_history;
|
||||
DELETE FROM _timescaledb_config.bgw_job CASCADE;
|
||||
@ -201,6 +233,12 @@ SELECT count(*) > 0 FROM timescaledb_information.job_history WHERE succeeded IS
|
||||
t
|
||||
(1 row)
|
||||
|
||||
SELECT count(*) > 0 FROM timescaledb_information.job_errors WHERE err_message ~ 'failed to start job';
|
||||
?column?
|
||||
----------
|
||||
t
|
||||
(1 row)
|
||||
|
||||
\set VERBOSITY terse
|
||||
\c :TEST_DBNAME :ROLE_SUPERUSER
|
||||
SELECT _timescaledb_functions.stop_background_workers();
|
||||
|
@ -13,6 +13,7 @@ SELECT pg_reload_conf();
|
||||
t
|
||||
(1 row)
|
||||
|
||||
\c :TEST_DBNAME :ROLE_SUPERUSER
|
||||
SET ROLE :ROLE_DEFAULT_PERM_USER;
|
||||
CREATE OR REPLACE PROCEDURE job_fail(jobid int, config jsonb)
|
||||
AS $$
|
||||
@ -75,37 +76,48 @@ SELECT pg_sleep(6);
|
||||
\set finish '2000-01-01 00:00:10+00'
|
||||
INSERT INTO _timescaledb_internal.bgw_job_stat_history(job_id, pid, succeeded, execution_start, execution_finish, data) VALUES
|
||||
(11111, 12345, false, :'start'::timestamptz, :'finish'::timestamptz, '{"error_data": {"message": "not an error"}}'),
|
||||
(22222, 45678, false, :'start'::timestamptz, :'finish'::timestamptz, '{}');
|
||||
(22222, 45678, false, :'start'::timestamptz, NULL, '{}'), -- Started and didn't finished yet
|
||||
(33333, NULL, NULL, :'start'::timestamptz, NULL, NULL); -- Crash detected cause not assigned an PID
|
||||
-- We check the log as different users and should only see what we
|
||||
-- have permissions to see. We only bother about jobs at 1000 or
|
||||
-- larger since the standard jobs are flaky.
|
||||
SET ROLE :ROLE_DEFAULT_PERM_USER;
|
||||
SELECT job_id, proc_schema, proc_name, sqlerrcode, err_message
|
||||
FROM timescaledb_information.job_errors WHERE job_id >= 1000;
|
||||
job_id | proc_schema | proc_name | sqlerrcode | err_message
|
||||
--------+-------------+-----------+------------+----------------------
|
||||
1000 | public | job_fail | P0001 | raising an exception
|
||||
(1 row)
|
||||
|
||||
SET ROLE :ROLE_DEFAULT_PERM_USER_2;
|
||||
SELECT job_id, proc_schema, proc_name, sqlerrcode, err_message
|
||||
FROM timescaledb_information.job_errors WHERE job_id >= 1000;
|
||||
job_id | proc_schema | proc_name | sqlerrcode | err_message
|
||||
--------+-------------+--------------+------------+-----------------------------------------------------
|
||||
1002 | public | custom_proc2 | 40001 | could not serialize access due to concurrent update
|
||||
(1 row)
|
||||
|
||||
SET ROLE :ROLE_SUPERUSER;
|
||||
SELECT job_id, proc_schema, proc_name, sqlerrcode, err_message
|
||||
FROM timescaledb_information.job_errors WHERE job_id >= 1000;
|
||||
FROM timescaledb_information.job_errors WHERE job_id >= 1000
|
||||
ORDER BY job_id;
|
||||
job_id | proc_schema | proc_name | sqlerrcode | err_message
|
||||
--------+-------------+--------------+------------+-----------------------------------------------------
|
||||
1000 | public | job_fail | P0001 | raising an exception
|
||||
1002 | public | custom_proc2 | 40001 | could not serialize access due to concurrent update
|
||||
11111 | | | | not an error
|
||||
22222 | | | | job crash detected, see server logs
|
||||
22222 | | | |
|
||||
(4 rows)
|
||||
|
||||
SET ROLE :ROLE_DEFAULT_PERM_USER_2;
|
||||
SELECT job_id, proc_schema, proc_name, sqlerrcode, err_message
|
||||
FROM timescaledb_information.job_errors WHERE job_id >= 1000
|
||||
ORDER BY job_id;
|
||||
job_id | proc_schema | proc_name | sqlerrcode | err_message
|
||||
--------+-------------+--------------+------------+-----------------------------------------------------
|
||||
1000 | public | job_fail | P0001 | raising an exception
|
||||
1002 | public | custom_proc2 | 40001 | could not serialize access due to concurrent update
|
||||
11111 | | | | not an error
|
||||
22222 | | | |
|
||||
(4 rows)
|
||||
|
||||
SET ROLE :ROLE_SUPERUSER;
|
||||
SELECT job_id, proc_schema, proc_name, sqlerrcode, err_message
|
||||
FROM timescaledb_information.job_errors WHERE job_id >= 1000
|
||||
ORDER BY job_id;
|
||||
job_id | proc_schema | proc_name | sqlerrcode | err_message
|
||||
--------+-------------+--------------+------------+-----------------------------------------------------
|
||||
1000 | public | job_fail | P0001 | raising an exception
|
||||
1002 | public | custom_proc2 | 40001 | could not serialize access due to concurrent update
|
||||
11111 | | | | not an error
|
||||
22222 | | | |
|
||||
33333 | | | | job crash detected, see server logs
|
||||
(5 rows)
|
||||
|
||||
SELECT delete_job(:custom_proc2_id);
|
||||
delete_job
|
||||
------------
|
||||
@ -124,6 +136,13 @@ SELECT delete_job(:job_fail_id);
|
||||
|
||||
(1 row)
|
||||
|
||||
ALTER SYSTEM RESET DEFAULT_TRANSACTION_ISOLATION;
|
||||
SELECT pg_reload_conf();
|
||||
pg_reload_conf
|
||||
----------------
|
||||
t
|
||||
(1 row)
|
||||
|
||||
\c :TEST_DBNAME :ROLE_SUPERUSER
|
||||
SELECT _timescaledb_functions.stop_background_workers();
|
||||
stop_background_workers
|
||||
|
@ -23,7 +23,6 @@ SHOW timescaledb.enable_job_execution_logging;
|
||||
|
||||
-- Start Background Workers
|
||||
SELECT _timescaledb_functions.start_background_workers();
|
||||
SELECT pg_sleep(6);
|
||||
|
||||
SELECT add_job('custom_job_ok', schedule_interval => interval '1 hour', initial_start := now()) AS job_id_1 \gset
|
||||
SELECT add_job('custom_job_error', schedule_interval => interval '1 hour', initial_start := now()) AS job_id_2 \gset
|
||||
@ -45,6 +44,9 @@ ORDER BY job_id;
|
||||
ALTER SYSTEM SET timescaledb.enable_job_execution_logging TO ON;
|
||||
SELECT pg_reload_conf();
|
||||
|
||||
-- Reconnect to make sure the GUC is set
|
||||
\c :TEST_DBNAME :ROLE_SUPERUSER
|
||||
|
||||
SELECT scheduled FROM alter_job(:job_id_1, next_start => now());
|
||||
SELECT scheduled FROM alter_job(:job_id_2, next_start => now());
|
||||
|
||||
|
@ -27,6 +27,9 @@ insert into custom_log values (0, 0, 'msg0');
|
||||
ALTER SYSTEM SET DEFAULT_TRANSACTION_ISOLATION TO 'serializable';
|
||||
SELECT pg_reload_conf();
|
||||
|
||||
-- Reconnect to make sure the GUC is set
|
||||
\c :TEST_DBNAME :ROLE_SUPERUSER
|
||||
|
||||
-- test a concurrent update
|
||||
CREATE OR REPLACE PROCEDURE custom_proc1(jobid int, config jsonb) LANGUAGE PLPGSQL AS
|
||||
$$
|
||||
@ -65,6 +68,9 @@ from _timescaledb_internal.bgw_job_stat_history WHERE job_id >= 1000 and succeed
|
||||
ALTER SYSTEM RESET DEFAULT_TRANSACTION_ISOLATION;
|
||||
SELECT pg_reload_conf();
|
||||
|
||||
-- Reconnect to make sure the GUC is set
|
||||
\c :TEST_DBNAME :ROLE_SUPERUSER
|
||||
|
||||
-- test the retention job
|
||||
SELECT next_start FROM alter_job(3, next_start => '2060-01-01 00:00:00+00'::timestamptz);
|
||||
DELETE FROM _timescaledb_internal.bgw_job_stat_history;
|
||||
@ -90,6 +96,22 @@ WHERE succeeded IS FALSE;
|
||||
\c :TEST_DBNAME :ROLE_SUPERUSER
|
||||
SELECT _timescaledb_functions.stop_background_workers();
|
||||
|
||||
-- Job didn't finish yet and Crash detected
|
||||
DELETE FROM _timescaledb_internal.bgw_job_stat_history;
|
||||
INSERT INTO _timescaledb_internal.bgw_job_stat_history(job_id, pid, succeeded, execution_start, execution_finish, data)
|
||||
VALUES (1, NULL, NULL, '2000-01-01 00:00:00+00'::timestamptz, NULL, '{}'), -- Crash server detected
|
||||
(2, 2222, false, '2000-01-01 00:00:00+00'::timestamptz, NULL, '{}'), -- Didn't finished yet
|
||||
(3, 3333, false, '2000-01-01 00:00:00+00'::timestamptz, '2000-01-01 01:00:00+00'::timestamptz, '{}'), -- Finish with ERROR
|
||||
(4, 4444, true, '2000-01-01 00:00:00+00'::timestamptz, '2000-01-01 01:00:00+00'::timestamptz, '{}'); -- Finish with SUCCESS
|
||||
|
||||
SELECT job_id, pid, succeeded, start_time, finish_time, config, err_message
|
||||
FROM timescaledb_information.job_history
|
||||
ORDER BY job_id;
|
||||
|
||||
SELECT job_id, pid, start_time, finish_time, err_message
|
||||
FROM timescaledb_information.job_errors
|
||||
ORDER BY job_id;
|
||||
|
||||
DELETE FROM _timescaledb_internal.bgw_job_stat;
|
||||
DELETE FROM _timescaledb_internal.bgw_job_stat_history;
|
||||
DELETE FROM _timescaledb_config.bgw_job CASCADE;
|
||||
@ -130,6 +152,7 @@ END;
|
||||
$TEST$;
|
||||
|
||||
SELECT count(*) > 0 FROM timescaledb_information.job_history WHERE succeeded IS FALSE AND err_message ~ 'failed to start job';
|
||||
SELECT count(*) > 0 FROM timescaledb_information.job_errors WHERE err_message ~ 'failed to start job';
|
||||
\set VERBOSITY terse
|
||||
|
||||
\c :TEST_DBNAME :ROLE_SUPERUSER
|
||||
|
@ -10,6 +10,7 @@ INSERT INTO my_table VALUES (0, 0);
|
||||
GRANT ALL ON my_table TO PUBLIC;
|
||||
ALTER SYSTEM SET DEFAULT_TRANSACTION_ISOLATION TO 'serializable';
|
||||
SELECT pg_reload_conf();
|
||||
\c :TEST_DBNAME :ROLE_SUPERUSER
|
||||
|
||||
SET ROLE :ROLE_DEFAULT_PERM_USER;
|
||||
|
||||
@ -61,26 +62,33 @@ SELECT pg_sleep(6);
|
||||
\set finish '2000-01-01 00:00:10+00'
|
||||
INSERT INTO _timescaledb_internal.bgw_job_stat_history(job_id, pid, succeeded, execution_start, execution_finish, data) VALUES
|
||||
(11111, 12345, false, :'start'::timestamptz, :'finish'::timestamptz, '{"error_data": {"message": "not an error"}}'),
|
||||
(22222, 45678, false, :'start'::timestamptz, :'finish'::timestamptz, '{}');
|
||||
(22222, 45678, false, :'start'::timestamptz, NULL, '{}'), -- Started and didn't finished yet
|
||||
(33333, NULL, NULL, :'start'::timestamptz, NULL, NULL); -- Crash detected cause not assigned an PID
|
||||
|
||||
-- We check the log as different users and should only see what we
|
||||
-- have permissions to see. We only bother about jobs at 1000 or
|
||||
-- larger since the standard jobs are flaky.
|
||||
SET ROLE :ROLE_DEFAULT_PERM_USER;
|
||||
SELECT job_id, proc_schema, proc_name, sqlerrcode, err_message
|
||||
FROM timescaledb_information.job_errors WHERE job_id >= 1000;
|
||||
FROM timescaledb_information.job_errors WHERE job_id >= 1000
|
||||
ORDER BY job_id;
|
||||
|
||||
SET ROLE :ROLE_DEFAULT_PERM_USER_2;
|
||||
SELECT job_id, proc_schema, proc_name, sqlerrcode, err_message
|
||||
FROM timescaledb_information.job_errors WHERE job_id >= 1000;
|
||||
FROM timescaledb_information.job_errors WHERE job_id >= 1000
|
||||
ORDER BY job_id;
|
||||
|
||||
SET ROLE :ROLE_SUPERUSER;
|
||||
SELECT job_id, proc_schema, proc_name, sqlerrcode, err_message
|
||||
FROM timescaledb_information.job_errors WHERE job_id >= 1000;
|
||||
FROM timescaledb_information.job_errors WHERE job_id >= 1000
|
||||
ORDER BY job_id;
|
||||
|
||||
SELECT delete_job(:custom_proc2_id);
|
||||
SELECT delete_job(:custom_proc1_id);
|
||||
SELECT delete_job(:job_fail_id);
|
||||
|
||||
ALTER SYSTEM RESET DEFAULT_TRANSACTION_ISOLATION;
|
||||
SELECT pg_reload_conf();
|
||||
|
||||
\c :TEST_DBNAME :ROLE_SUPERUSER
|
||||
SELECT _timescaledb_functions.stop_background_workers();
|
||||
|
Loading…
x
Reference in New Issue
Block a user