From eedaaecc46966c75983f34e269fb28f7c8ae7277 Mon Sep 17 00:00:00 2001 From: Aleksander Alekseev Date: Wed, 16 Feb 2022 16:38:24 +0300 Subject: [PATCH] Custom origin's support in CAGGs This patch allows using custom origin's in CAGGs, for instance: time_bucket_ng('7 days', day, '2000-01-03' :: date) AS bucket For weekly buckets this allows the user to choose what should be considered the beginning of the week - Sunday or Monday. Also by shifting the origin one second forward or backward user can tweak the inclusiveness of the buckets. This works for date's, timestamp's and timestamptz's. The bucket size is considered variable-sized in all these cases. CAGGs on top of distributed hypertables, compressed hypertables and compressed distributed hypertables are supported as well. Additionally, this patch does several small refactorings. Firstly, it makes sure that experimental features of CAGGs will be tested in both Debug and Release builds. This was previously overlooked. Secondly, it renames the tests so that a person who is working on experimental features in CAGGs will be able to easily execute all the related tests: `TESTS='exp_cagg_*' make installcheck` Last but not least the patch refactors is_valid_bucketing_function() and renames it to function_allowed_in_cagg_definition(). The reason to do it in this patch is that otherwise, the logic of the function gets rather confusing which complicates code review. fix --- src/func_cache.c | 35 + src/func_cache.h | 1 + src/ts_catalog/catalog.h | 2 +- src/ts_catalog/continuous_agg.c | 74 +- src/ts_catalog/continuous_agg.h | 7 +- tsl/src/continuous_aggs/create.c | 169 ++- ...{cagg_monthly.out => exp_cagg_monthly.out} | 10 +- ...experimental.out => exp_cagg_next_gen.out} | 0 tsl/test/expected/exp_cagg_origin.out | 1142 +++++++++++++++++ ...ith_timezone.out => exp_cagg_timezone.out} | 10 +- tsl/test/sql/CMakeLists.txt | 7 +- ...{cagg_monthly.sql => exp_cagg_monthly.sql} | 0 ...experimental.sql => exp_cagg_next_gen.sql} | 0 tsl/test/sql/exp_cagg_origin.sql | 688 ++++++++++ ...ith_timezone.sql => exp_cagg_timezone.sql} | 0 15 files changed, 2086 insertions(+), 59 deletions(-) rename tsl/test/expected/{cagg_monthly.out => exp_cagg_monthly.out} (98%) rename tsl/test/expected/{cagg_experimental.out => exp_cagg_next_gen.out} (100%) create mode 100644 tsl/test/expected/exp_cagg_origin.out rename tsl/test/expected/{cagg_with_timezone.out => exp_cagg_timezone.out} (98%) rename tsl/test/sql/{cagg_monthly.sql => exp_cagg_monthly.sql} (100%) rename tsl/test/sql/{cagg_experimental.sql => exp_cagg_next_gen.sql} (100%) create mode 100644 tsl/test/sql/exp_cagg_origin.sql rename tsl/test/sql/{cagg_with_timezone.sql => exp_cagg_timezone.sql} (100%) diff --git a/src/func_cache.c b/src/func_cache.c index d36ed42d5..4713873fc 100644 --- a/src/func_cache.c +++ b/src/func_cache.c @@ -178,6 +178,7 @@ static FuncInfo funcinfo[] = { { .origin = ORIGIN_TIMESCALE, .is_bucketing_func = true, + .allowed_in_cagg_definition = true, .funcname = "time_bucket", .nargs = 2, .arg_types = { INTERVALOID, TIMESTAMPOID }, @@ -187,6 +188,7 @@ static FuncInfo funcinfo[] = { { .origin = ORIGIN_TIMESCALE, .is_bucketing_func = true, + .allowed_in_cagg_definition = false, .funcname = "time_bucket", .nargs = 3, .arg_types = { INTERVALOID, TIMESTAMPOID, TIMESTAMPOID }, @@ -196,6 +198,7 @@ static FuncInfo funcinfo[] = { { .origin = ORIGIN_TIMESCALE, .is_bucketing_func = true, + .allowed_in_cagg_definition = true, .funcname = "time_bucket", .nargs = 2, .arg_types = { INTERVALOID, TIMESTAMPTZOID }, @@ -205,6 +208,7 @@ static FuncInfo funcinfo[] = { { .origin = ORIGIN_TIMESCALE, .is_bucketing_func = true, + .allowed_in_cagg_definition = false, .funcname = "time_bucket", .nargs = 3, .arg_types = { INTERVALOID, TIMESTAMPTZOID, TIMESTAMPTZOID }, @@ -214,6 +218,7 @@ static FuncInfo funcinfo[] = { { .origin = ORIGIN_TIMESCALE, .is_bucketing_func = true, + .allowed_in_cagg_definition = true, .funcname = "time_bucket", .nargs = 2, .arg_types = { INTERVALOID, DATEOID }, @@ -223,6 +228,7 @@ static FuncInfo funcinfo[] = { { .origin = ORIGIN_TIMESCALE, .is_bucketing_func = true, + .allowed_in_cagg_definition = false, .funcname = "time_bucket", .nargs = 3, .arg_types = { INTERVALOID, DATEOID, DATEOID }, @@ -232,6 +238,7 @@ static FuncInfo funcinfo[] = { { .origin = ORIGIN_TIMESCALE, .is_bucketing_func = true, + .allowed_in_cagg_definition = true, .funcname = "time_bucket", .nargs = 2, .arg_types = { INT2OID, INT2OID }, @@ -241,6 +248,7 @@ static FuncInfo funcinfo[] = { { .origin = ORIGIN_TIMESCALE, .is_bucketing_func = true, + .allowed_in_cagg_definition = false, .funcname = "time_bucket", .nargs = 3, .arg_types = { INT2OID, INT2OID, INT2OID }, @@ -250,6 +258,7 @@ static FuncInfo funcinfo[] = { { .origin = ORIGIN_TIMESCALE, .is_bucketing_func = true, + .allowed_in_cagg_definition = true, .funcname = "time_bucket", .nargs = 2, .arg_types = { INT4OID, INT4OID }, @@ -259,6 +268,7 @@ static FuncInfo funcinfo[] = { { .origin = ORIGIN_TIMESCALE, .is_bucketing_func = true, + .allowed_in_cagg_definition = false, .funcname = "time_bucket", .nargs = 3, .arg_types = { INT4OID, INT4OID, INT4OID }, @@ -268,6 +278,7 @@ static FuncInfo funcinfo[] = { { .origin = ORIGIN_TIMESCALE, .is_bucketing_func = true, + .allowed_in_cagg_definition = true, .funcname = "time_bucket", .nargs = 2, .arg_types = { INT8OID, INT8OID }, @@ -277,6 +288,7 @@ static FuncInfo funcinfo[] = { { .origin = ORIGIN_TIMESCALE, .is_bucketing_func = true, + .allowed_in_cagg_definition = false, .funcname = "time_bucket", .nargs = 3, .arg_types = { INT8OID, INT8OID, INT8OID }, @@ -286,6 +298,7 @@ static FuncInfo funcinfo[] = { { .origin = ORIGIN_TIMESCALE_EXPERIMENTAL, .is_bucketing_func = true, + .allowed_in_cagg_definition = true, .funcname = "time_bucket_ng", .nargs = 2, .arg_types = { INTERVALOID, DATEOID }, @@ -295,6 +308,7 @@ static FuncInfo funcinfo[] = { { .origin = ORIGIN_TIMESCALE_EXPERIMENTAL, .is_bucketing_func = true, + .allowed_in_cagg_definition = true, .funcname = "time_bucket_ng", .nargs = 3, .arg_types = { INTERVALOID, DATEOID, DATEOID }, @@ -304,6 +318,7 @@ static FuncInfo funcinfo[] = { { .origin = ORIGIN_TIMESCALE_EXPERIMENTAL, .is_bucketing_func = true, + .allowed_in_cagg_definition = true, .funcname = "time_bucket_ng", .nargs = 2, .arg_types = { INTERVALOID, TIMESTAMPOID }, @@ -313,6 +328,7 @@ static FuncInfo funcinfo[] = { { .origin = ORIGIN_TIMESCALE_EXPERIMENTAL, .is_bucketing_func = true, + .allowed_in_cagg_definition = true, .funcname = "time_bucket_ng", .nargs = 3, .arg_types = { INTERVALOID, TIMESTAMPOID, TIMESTAMPOID }, @@ -322,15 +338,27 @@ static FuncInfo funcinfo[] = { { .origin = ORIGIN_TIMESCALE_EXPERIMENTAL, .is_bucketing_func = true, + .allowed_in_cagg_definition = true, .funcname = "time_bucket_ng", .nargs = 3, .arg_types = { INTERVALOID, TIMESTAMPTZOID, TEXTOID }, .group_estimate = time_bucket_group_estimate, .sort_transform = time_bucket_sort_transform, }, + { + .origin = ORIGIN_TIMESCALE_EXPERIMENTAL, + .is_bucketing_func = true, + .allowed_in_cagg_definition = true, + .funcname = "time_bucket_ng", + .nargs = 4, + .arg_types = { INTERVALOID, TIMESTAMPTZOID, TIMESTAMPTZOID, TEXTOID }, + .group_estimate = time_bucket_group_estimate, + .sort_transform = time_bucket_sort_transform, + }, { .origin = ORIGIN_TIMESCALE, .is_bucketing_func = true, + .allowed_in_cagg_definition = false, .funcname = "time_bucket_gapfill", .nargs = 4, .arg_types = { INTERVALOID, TIMESTAMPOID, TIMESTAMPOID, TIMESTAMPOID }, @@ -340,6 +368,7 @@ static FuncInfo funcinfo[] = { { .origin = ORIGIN_TIMESCALE, .is_bucketing_func = true, + .allowed_in_cagg_definition = false, .funcname = "time_bucket_gapfill", .nargs = 4, .arg_types = { INTERVALOID, TIMESTAMPTZOID, TIMESTAMPTZOID, TIMESTAMPTZOID }, @@ -349,6 +378,7 @@ static FuncInfo funcinfo[] = { { .origin = ORIGIN_TIMESCALE, .is_bucketing_func = true, + .allowed_in_cagg_definition = false, .funcname = "time_bucket_gapfill", .nargs = 4, .arg_types = { INTERVALOID, DATEOID, DATEOID, DATEOID }, @@ -358,6 +388,7 @@ static FuncInfo funcinfo[] = { { .origin = ORIGIN_TIMESCALE, .is_bucketing_func = true, + .allowed_in_cagg_definition = false, .funcname = "time_bucket_gapfill", .nargs = 4, .arg_types = { INT2OID, INT2OID, INT2OID, INT2OID }, @@ -367,6 +398,7 @@ static FuncInfo funcinfo[] = { { .origin = ORIGIN_TIMESCALE, .is_bucketing_func = true, + .allowed_in_cagg_definition = false, .funcname = "time_bucket_gapfill", .nargs = 4, .arg_types = { INT4OID, INT4OID, INT4OID, INT4OID }, @@ -376,6 +408,7 @@ static FuncInfo funcinfo[] = { { .origin = ORIGIN_TIMESCALE, .is_bucketing_func = true, + .allowed_in_cagg_definition = false, .funcname = "time_bucket_gapfill", .nargs = 4, .arg_types = { INT8OID, INT8OID, INT8OID, INT8OID }, @@ -386,6 +419,7 @@ static FuncInfo funcinfo[] = { { .origin = ORIGIN_POSTGRES, .is_bucketing_func = true, + .allowed_in_cagg_definition = false, .funcname = "date_trunc", .nargs = 2, .arg_types = { TEXTOID, TIMESTAMPOID }, @@ -395,6 +429,7 @@ static FuncInfo funcinfo[] = { { .origin = ORIGIN_POSTGRES, .is_bucketing_func = true, + .allowed_in_cagg_definition = false, .funcname = "date_trunc", .nargs = 2, .arg_types = { TEXTOID, TIMESTAMPTZOID }, diff --git a/src/func_cache.h b/src/func_cache.h index d82affcd6..ac1893767 100644 --- a/src/func_cache.h +++ b/src/func_cache.h @@ -39,6 +39,7 @@ typedef struct FuncInfo const char *funcname; FuncOrigin origin; bool is_bucketing_func; + bool allowed_in_cagg_definition; int nargs; Oid arg_types[FUNC_CACHE_MAX_FUNC_ARGS]; group_estimate_func group_estimate; diff --git a/src/ts_catalog/catalog.h b/src/ts_catalog/catalog.h index 2ce65c525..a7f8b60f5 100644 --- a/src/ts_catalog/catalog.h +++ b/src/ts_catalog/catalog.h @@ -22,7 +22,7 @@ * The TimescaleDB catalog contains schema metadata for hypertables, among other * things. The metadata is stored in regular tables. This header file contains * definitions for those tables and should match any table declarations in - * sql/common/tables.sql. + * sql/pre_install/tables.sql. * * A source file that includes this header has access to a catalog object, * which contains cached information about catalog tables, such as relation diff --git a/src/ts_catalog/continuous_agg.c b/src/ts_catalog/continuous_agg.c index 26d990d4c..d9f141015 100644 --- a/src/ts_catalog/continuous_agg.c +++ b/src/ts_catalog/continuous_agg.c @@ -282,6 +282,7 @@ continuous_agg_fill_bucket_function(int32 mat_hypertable_id, ContinuousAggsBucke ts_scanner_foreach(&iterator) { const char *bucket_width_str; + const char *origin_str; Datum values[Natts_continuous_aggs_bucket_function]; bool isnull[Natts_continuous_aggs_bucket_function]; @@ -314,7 +315,14 @@ continuous_agg_fill_bucket_function(int32 mat_hypertable_id, ContinuousAggsBucke DirectFunctionCall3(interval_in, CStringGetDatum(bucket_width_str), InvalidOid, -1)); Assert(!isnull[Anum_continuous_aggs_bucket_function_origin - 1]); - bf->origin = TextDatumGetCString(values[Anum_continuous_aggs_bucket_function_origin - 1]); + origin_str = TextDatumGetCString(values[Anum_continuous_aggs_bucket_function_origin - 1]); + if (strlen(origin_str) == 0) + TIMESTAMP_NOBEGIN(bf->origin); + else + bf->origin = DatumGetTimestamp(DirectFunctionCall3(timestamp_in, + CStringGetDatum(origin_str), + ObjectIdGetDatum(InvalidOid), + Int32GetDatum(-1))); Assert(!isnull[Anum_continuous_aggs_bucket_function_timezone - 1]); bf->timezone = @@ -416,7 +424,8 @@ ts_continuous_agg_get_all_caggs_info(int32 raw_hypertable_id) static const char * bucket_function_serialize(const ContinuousAggsBucketFunction *bf) { - char *bucket_width_str; + const char *bucket_width_str; + const char *origin_str = ""; StringInfo str; if (NULL == bf) @@ -424,17 +433,23 @@ bucket_function_serialize(const ContinuousAggsBucketFunction *bf) str = makeStringInfo(); - /* We are pretty sure that user can't place ';' character in these fields */ - Assert(strstr(bf->origin, ";") == NULL); + /* We are pretty sure that user can't place ';' character in this field */ Assert(strstr(bf->timezone, ";") == NULL); bucket_width_str = DatumGetCString(DirectFunctionCall1(interval_out, IntervalPGetDatum(bf->bucket_width))); + + if (!TIMESTAMP_NOT_FINITE(bf->origin)) + { + origin_str = + DatumGetCString(DirectFunctionCall1(timestamp_out, TimestampGetDatum(bf->origin))); + } + appendStringInfo(str, "%d;%s;%s;%s;", BUCKET_FUNCTION_SERIALIZE_VERSION, bucket_width_str, - bf->origin, + origin_str, bf->timezone); return str->data; @@ -491,7 +506,15 @@ bucket_function_deserialize(const char *str) Assert(strlen(strings[1]) > 0); bf->bucket_width = DatumGetIntervalP( DirectFunctionCall3(interval_in, CStringGetDatum(strings[1]), InvalidOid, -1)); - bf->origin = strings[2]; + + if (strlen(strings[2]) == 0) + TIMESTAMP_NOBEGIN(bf->origin); + else + bf->origin = DatumGetTimestamp(DirectFunctionCall3(timestamp_in, + CStringGetDatum(strings[2]), + ObjectIdGetDatum(InvalidOid), + Int32GetDatum(-1))); + bf->timezone = strings[3]; return bf; } @@ -1510,15 +1533,40 @@ generic_time_bucket_ng(const ContinuousAggsBucketFunction *bf, Datum timestamp) if (strlen(bf->timezone) > 0) { - return DirectFunctionCall3(ts_time_bucket_ng_timezone, - IntervalPGetDatum(bf->bucket_width), - timestamp, - CStringGetTextDatum(bf->timezone)); + if (TIMESTAMP_NOT_FINITE(bf->origin)) + { + /* using default origin */ + return DirectFunctionCall3(ts_time_bucket_ng_timezone, + IntervalPGetDatum(bf->bucket_width), + timestamp, + CStringGetTextDatum(bf->timezone)); + } + else + { + /* custom origin specified */ + return DirectFunctionCall4(ts_time_bucket_ng_timezone_origin, + IntervalPGetDatum(bf->bucket_width), + timestamp, + TimestampTzGetDatum((TimestampTz) bf->origin), + CStringGetTextDatum(bf->timezone)); + } } - return DirectFunctionCall2(ts_time_bucket_ng_timestamp, - IntervalPGetDatum(bf->bucket_width), - timestamp); + if (TIMESTAMP_NOT_FINITE(bf->origin)) + { + /* using default origin */ + return DirectFunctionCall2(ts_time_bucket_ng_timestamp, + IntervalPGetDatum(bf->bucket_width), + timestamp); + } + else + { + /* custom origin specified */ + return DirectFunctionCall3(ts_time_bucket_ng_timestamp, + IntervalPGetDatum(bf->bucket_width), + timestamp, + TimestampGetDatum(bf->origin)); + } } /* diff --git a/src/ts_catalog/continuous_agg.h b/src/ts_catalog/continuous_agg.h index 09cc96841..936cf70ce 100644 --- a/src/ts_catalog/continuous_agg.h +++ b/src/ts_catalog/continuous_agg.h @@ -65,8 +65,11 @@ typedef struct ContinuousAggsBucketFunction char *name; /* `bucket_width` argument of the function */ Interval *bucket_width; - /* `origin` argument of the function provided by the user */ - char *origin; + /* + * Custom origin value stored as UTC timestamp. + * If not specified, stores infinity. + */ + Timestamp origin; /* `timezone` argument of the function provided by the user */ char *timezone; } ContinuousAggsBucketFunction; diff --git a/tsl/src/continuous_aggs/create.c b/tsl/src/continuous_aggs/create.c index 31b2a3d18..58c807ea9 100644 --- a/tsl/src/continuous_aggs/create.c +++ b/tsl/src/continuous_aggs/create.c @@ -182,6 +182,12 @@ typedef struct CAggTimebucketInfo variable-sized buckets */ Interval *interval; /* stores the interval, NULL if not specified */ const char *timezone; /* the name of the timezone, NULL if not specified */ + + /* + * Custom origin value stored as UTC timestamp. + * If not specified, stores infinity. + */ + Timestamp origin; } CAggTimebucketInfo; typedef struct AggPartCxt @@ -216,7 +222,7 @@ static void finalizequery_init(FinalizeQueryInfo *inp, Query *orig_query, MatTableColumnInfo *mattblinfo); static Query *finalizequery_get_select_query(FinalizeQueryInfo *inp, List *matcollist, ObjectAddress *mattbladdress); -static bool is_valid_bucketing_function(Oid funcid); +static bool function_allowed_in_cagg_definition(Oid funcid); static Const *cagg_boundary_make_lower_bound(Oid type); static Oid cagg_get_boundary_converter_funcoid(Oid typoid); @@ -673,9 +679,10 @@ caggtimebucketinfo_init(CAggTimebucketInfo *src, int32 hypertable_id, Oid hypert src->htpartcolno = hypertable_partition_colno; src->htpartcoltype = hypertable_partition_coltype; src->htpartcol_interval_len = hypertable_partition_col_interval; - src->bucket_width = 0; /* invalid value */ - src->interval = NULL; /* not specified by default */ - src->timezone = NULL; /* not specified by default */ + src->bucket_width = 0; /* invalid value */ + src->interval = NULL; /* not specified by default */ + src->timezone = NULL; /* not specified by default */ + TIMESTAMP_NOBEGIN(src->origin); /* origin is not specified by default */ } /* @@ -689,11 +696,10 @@ caggtimebucket_validate(CAggTimebucketInfo *tbinfo, List *groupClause, List *tar ListCell *l; bool found = false; - /* - * Make sure bucket_width was initialized by caggtimebucketinfo_init(). - * This assumption is used below. - */ + /* Make sure tbinfo was initialized. This assumption is used below. */ Assert(tbinfo->bucket_width == 0); + Assert(tbinfo->timezone == NULL); + Assert(TIMESTAMP_NOT_FINITE(tbinfo->origin)); foreach (l, groupClause) { @@ -706,7 +712,7 @@ caggtimebucket_validate(CAggTimebucketInfo *tbinfo, List *groupClause, List *tar Node *col_arg; Node *tz_arg; - if (!is_valid_bucketing_function(fe->funcid)) + if (!function_allowed_in_cagg_definition(fe->funcid)) continue; if (found) @@ -726,25 +732,27 @@ caggtimebucket_validate(CAggTimebucketInfo *tbinfo, List *groupClause, List *tar errmsg( "time bucket function must reference a hypertable dimension column"))); - if (list_length(fe->args) == 3) + if (list_length(fe->args) == 4) { /* - * Currently the third argument of the bucketing function can be - * only a timezone. Only immutable expressions can be specified. + * Timezone and custom origin are specified. In this clause we + * save only the timezone. Origin is processed in the following + * clause. */ - tz_arg = eval_const_expressions(NULL, lthird(fe->args)); + tz_arg = eval_const_expressions(NULL, lfourth(fe->args)); + if (!IsA(tz_arg, Const)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("only immutable expressions allowed in time bucket function"), - errhint("Use an immutable expression as third argument" + errhint("Use an immutable expression as fourth argument" " to the time bucket function."))); Const *tz = castNode(Const, tz_arg); - /* Ensured by is_valid_bucketing_function() above */ + + /* This is assured by function_allowed_in_cagg_definition() above. */ Assert(tz->consttype == TEXTOID); const char *tz_name = TextDatumGetCString(tz->constvalue); - if (!ts_is_valid_timezone_name(tz_name)) { ereport(ERROR, @@ -756,6 +764,79 @@ caggtimebucket_validate(CAggTimebucketInfo *tbinfo, List *groupClause, List *tar tbinfo->bucket_width = BUCKET_WIDTH_VARIABLE; } + if (list_length(fe->args) >= 3) + { + tz_arg = eval_const_expressions(NULL, lthird(fe->args)); + if (!IsA(tz_arg, Const)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("only immutable expressions allowed in time bucket function"), + errhint("Use an immutable expression as third argument" + " to the time bucket function."))); + + Const *tz = castNode(Const, tz_arg); + if ((tz->consttype == TEXTOID) && (list_length(fe->args) == 3)) + { + /* Timezone specified */ + const char *tz_name = TextDatumGetCString(tz->constvalue); + + if (!ts_is_valid_timezone_name(tz_name)) + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid timezone name \"%s\"", tz_name))); + } + + tbinfo->timezone = tz_name; + tbinfo->bucket_width = BUCKET_WIDTH_VARIABLE; + } + else + { + /* + * Custom origin specified. This is always treated as + * a variable-sized bucket case. + */ + tbinfo->bucket_width = BUCKET_WIDTH_VARIABLE; + + if (tz->constisnull) + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid origin value: null"))); + } + + switch (tz->consttype) + { + case DATEOID: + tbinfo->origin = DatumGetTimestamp( + DirectFunctionCall1(date_timestamp, tz->constvalue)); + break; + case TIMESTAMPOID: + tbinfo->origin = DatumGetTimestamp(tz->constvalue); + break; + case TIMESTAMPTZOID: + tbinfo->origin = DatumGetTimestampTz(tz->constvalue); + break; + default: + /* + * This shouldn't happen. But if somehow it does + * make sure the execution will stop here even in + * the Release build. + */ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("unsupported time bucket function"))); + } + + if (TIMESTAMP_NOT_FINITE(tbinfo->origin)) + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid origin value: infinity"))); + } + } + } + /* * We constify width expression here so any immutable expression will be allowed * otherwise it would make it harder to create caggs for hypertables with e.g. int8 @@ -786,6 +867,37 @@ caggtimebucket_validate(CAggTimebucketInfo *tbinfo, List *groupClause, List *tar errmsg("only immutable expressions allowed in time bucket function"), errhint("Use an immutable expression as first argument" " to the time bucket function."))); + + if ((tbinfo->bucket_width == BUCKET_WIDTH_VARIABLE) && (tbinfo->interval->month != 0)) + { + /* Monthly buckets case */ + if (!TIMESTAMP_NOT_FINITE(tbinfo->origin)) + { + /* + * Origin was specified - make sure it's the first day of the month. + * If a timezone was specified the check should be done in this timezone. + */ + Timestamp origin = tbinfo->origin; + if (tbinfo->timezone != NULL) + { + /* The code is equal to 'timestamptz AT TIME ZONE tzname'. */ + origin = DatumGetTimestamp( + DirectFunctionCall2(timestamptz_zone, + CStringGetTextDatum(tbinfo->timezone), + TimestampTzGetDatum((TimestampTz) origin))); + } + + const char *day = + TextDatumGetCString(DirectFunctionCall2(timestamp_to_char, + TimestampGetDatum(origin), + CStringGetTextDatum("DD"))); + if (strcmp(day, "01") != 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("for monthly buckets origin must be the first day of the " + "month"))); + } + } } } @@ -1310,23 +1422,13 @@ get_partialize_funcexpr(Aggref *agg) * for continuous aggregates. */ static bool -is_valid_bucketing_function(Oid funcid) +function_allowed_in_cagg_definition(Oid funcid) { - bool is_timescale, is_timezone; FuncInfo *finfo = ts_func_cache_get_bucketing_func(funcid); - if (finfo == NULL) - { return false; - } - is_timescale = - (finfo->origin == ORIGIN_TIMESCALE) || (finfo->origin == ORIGIN_TIMESCALE_EXPERIMENTAL); - - is_timezone = (finfo->nargs == 3) && (finfo->arg_types[0] == INTERVALOID) && - (finfo->arg_types[1] == TIMESTAMPTZOID) && (finfo->arg_types[2] == TEXTOID); - - return is_timescale && ((finfo->nargs == 2) || is_timezone); + return finfo->allowed_in_cagg_definition; } /*initialize MatTableColumnInfo */ @@ -1391,7 +1493,7 @@ mattablecolumninfo_addentry(MatTableColumnInfo *out, Node *input, int original_q bool timebkt_chk = false; if (IsA(tle->expr, FuncExpr)) - timebkt_chk = is_valid_bucketing_function(((FuncExpr *) tle->expr)->funcid); + timebkt_chk = function_allowed_in_cagg_definition(((FuncExpr *) tle->expr)->funcid); if (tle->resname) colname = pstrdup(tle->resname); @@ -1982,6 +2084,7 @@ cagg_create(const CreateTableAsStmt *create_stmt, ViewStmt *stmt, Query *panquer if (origquery_ht->bucket_width == BUCKET_WIDTH_VARIABLE) { const char *bucket_width; + const char *origin = ""; /* * Variable-sized buckets work only with intervals. @@ -1990,6 +2093,12 @@ cagg_create(const CreateTableAsStmt *create_stmt, ViewStmt *stmt, Query *panquer bucket_width = DatumGetCString( DirectFunctionCall1(interval_out, IntervalPGetDatum(origquery_ht->interval))); + if (!TIMESTAMP_NOT_FINITE(origquery_ht->origin)) + { + origin = DatumGetCString( + DirectFunctionCall1(timestamp_out, TimestampGetDatum(origquery_ht->origin))); + } + /* * `experimental` = true and `name` = "time_bucket_ng" are hardcoded * rather than extracted from the query. We happen to know that @@ -2004,7 +2113,7 @@ cagg_create(const CreateTableAsStmt *create_stmt, ViewStmt *stmt, Query *panquer true, "time_bucket_ng", bucket_width, - "", + origin, origquery_ht->timezone); } diff --git a/tsl/test/expected/cagg_monthly.out b/tsl/test/expected/exp_cagg_monthly.out similarity index 98% rename from tsl/test/expected/cagg_monthly.out rename to tsl/test/expected/exp_cagg_monthly.out index 1482f8e6f..6aedd4476 100644 --- a/tsl/test/expected/cagg_monthly.out +++ b/tsl/test/expected/exp_cagg_monthly.out @@ -1033,11 +1033,11 @@ RESET timescaledb.materializations_per_refresh_window; \set DATA_NODE_3 :TEST_DBNAME _3 SELECT (add_data_node (name, host => 'localhost', DATABASE => name)).* FROM (VALUES (:'DATA_NODE_1'), (:'DATA_NODE_2'), (:'DATA_NODE_3')) v (name); - node_name | host | port | database | node_created | database_created | extension_created --------------------+-----------+-------+-------------------+--------------+------------------+------------------- - db_cagg_monthly_1 | localhost | 55432 | db_cagg_monthly_1 | t | t | t - db_cagg_monthly_2 | localhost | 55432 | db_cagg_monthly_2 | t | t | t - db_cagg_monthly_3 | localhost | 55432 | db_cagg_monthly_3 | t | t | t + node_name | host | port | database | node_created | database_created | extension_created +-----------------------+-----------+-------+-----------------------+--------------+------------------+------------------- + db_exp_cagg_monthly_1 | localhost | 55432 | db_exp_cagg_monthly_1 | t | t | t + db_exp_cagg_monthly_2 | localhost | 55432 | db_exp_cagg_monthly_2 | t | t | t + db_exp_cagg_monthly_3 | localhost | 55432 | db_exp_cagg_monthly_3 | t | t | t (3 rows) GRANT USAGE ON FOREIGN SERVER :DATA_NODE_1, :DATA_NODE_2, :DATA_NODE_3 TO PUBLIC; diff --git a/tsl/test/expected/cagg_experimental.out b/tsl/test/expected/exp_cagg_next_gen.out similarity index 100% rename from tsl/test/expected/cagg_experimental.out rename to tsl/test/expected/exp_cagg_next_gen.out diff --git a/tsl/test/expected/exp_cagg_origin.out b/tsl/test/expected/exp_cagg_origin.out new file mode 100644 index 000000000..764c9f2d5 --- /dev/null +++ b/tsl/test/expected/exp_cagg_origin.out @@ -0,0 +1,1142 @@ +-- This file and its contents are licensed under the Timescale License. +-- Please see the included NOTICE for copyright information and +-- LICENSE-TIMESCALE for a copy of the license. +CREATE TABLE conditions( + day DATE NOT NULL, + city text NOT NULL, + temperature INT NOT NULL); +SELECT create_hypertable( + 'conditions', 'day', + chunk_time_interval => INTERVAL '1 day' +); + create_hypertable +------------------------- + (1,public,conditions,t) +(1 row) + +INSERT INTO conditions (day, city, temperature) VALUES + ('2021-06-14', 'Moscow', 26), + ('2021-06-15', 'Moscow', 22), + ('2021-06-16', 'Moscow', 24), + ('2021-06-17', 'Moscow', 24), + ('2021-06-18', 'Moscow', 27), + ('2021-06-19', 'Moscow', 28), + ('2021-06-20', 'Moscow', 30), + ('2021-06-21', 'Moscow', 31), + ('2021-06-22', 'Moscow', 34), + ('2021-06-23', 'Moscow', 34), + ('2021-06-24', 'Moscow', 34), + ('2021-06-25', 'Moscow', 32), + ('2021-06-26', 'Moscow', 32), + ('2021-06-27', 'Moscow', 31); +\set ON_ERROR_STOP 0 +-- Make sure NULL can't be specified as an origin +CREATE MATERIALIZED VIEW conditions_summary_weekly +WITH (timescaledb.continuous, timescaledb.materialized_only=true) AS +SELECT city, + timescaledb_experimental.time_bucket_ng('7 days', day, null) AS bucket, + MIN(temperature), + MAX(temperature) +FROM conditions +GROUP BY city, bucket +WITH NO DATA; +ERROR: invalid origin value: null +-- Make sure 'infinity' can't be specified as an origin +CREATE MATERIALIZED VIEW conditions_summary_weekly +WITH (timescaledb.continuous, timescaledb.materialized_only=true) AS +SELECT city, + timescaledb_experimental.time_bucket_ng('7 days', day, 'infinity' :: date) AS bucket, + MIN(temperature), + MAX(temperature) +FROM conditions +GROUP BY city, bucket +WITH NO DATA; +ERROR: invalid origin value: infinity +-- For monthly buckets origin should be the first day of the month +CREATE MATERIALIZED VIEW conditions_summary_weekly +WITH (timescaledb.continuous, timescaledb.materialized_only=true) AS +SELECT city, + timescaledb_experimental.time_bucket_ng('1 month', day, '2021-06-03') AS bucket, + MIN(temperature), + MAX(temperature) +FROM conditions +GROUP BY city, bucket +WITH NO DATA; +ERROR: for monthly buckets origin must be the first day of the month +-- Make sure buckets like '1 months 15 days" (fixed+variable-sized) are not allowed +CREATE MATERIALIZED VIEW conditions_summary_weekly +WITH (timescaledb.continuous, timescaledb.materialized_only=true) AS +SELECT city, + timescaledb_experimental.time_bucket_ng('1 month 15 days', day, '2021-06-01') AS bucket, + MIN(temperature), + MAX(temperature) +FROM conditions +GROUP BY city, bucket +WITH NO DATA; +ERROR: invalid interval specified +\set ON_ERROR_STOP 1 +CREATE MATERIALIZED VIEW conditions_summary_weekly +WITH (timescaledb.continuous, timescaledb.materialized_only=true) AS +SELECT city, + timescaledb_experimental.time_bucket_ng('7 days', day, '2000-01-03' :: date) AS bucket, + MIN(temperature), + MAX(temperature) +FROM conditions +GROUP BY city, bucket +WITH NO DATA; +SELECT to_char(bucket, 'YYYY-MM-DD'), city, min, max +FROM conditions_summary_weekly +ORDER BY bucket; + to_char | city | min | max +---------+------+-----+----- +(0 rows) + +SELECT mat_hypertable_id AS cagg_id, raw_hypertable_id AS ht_id +FROM _timescaledb_catalog.continuous_agg +WHERE user_view_name = 'conditions_summary_weekly' +\gset +-- Make sure this is treated as a variable-sized bucket case +SELECT bucket_width +FROM _timescaledb_catalog.continuous_agg +WHERE mat_hypertable_id = :cagg_id; + bucket_width +-------------- + -1 +(1 row) + +-- Make sure the origin is saved in the catalog table +SELECT experimental, name, bucket_width, origin, timezone +FROM _timescaledb_catalog.continuous_aggs_bucket_function +WHERE mat_hypertable_id = :cagg_id; + experimental | name | bucket_width | origin | timezone +--------------+----------------+--------------+--------------------------+---------- + t | time_bucket_ng | @ 7 days | Mon Jan 03 00:00:00 2000 | +(1 row) + +-- Make sure truncating of the refresh window works +\set ON_ERROR_STOP 0 +CALL refresh_continuous_aggregate('conditions_summary_weekly', '2021-06-14', '2021-06-20'); +ERROR: refresh window too small +\set ON_ERROR_STOP 1 +-- Make sure refreshing works +CALL refresh_continuous_aggregate('conditions_summary_weekly', '2021-06-14', '2021-06-21'); +SELECT city, to_char(bucket, 'YYYY-MM-DD') AS week, min, max +FROM conditions_summary_weekly +ORDER BY week, city; + city | week | min | max +--------+------------+-----+----- + Moscow | 2021-06-14 | 22 | 30 +(1 row) + +-- Check the invalidation threshold +SELECT _timescaledb_internal.to_timestamp(watermark) at time zone 'UTC' +FROM _timescaledb_catalog.continuous_aggs_invalidation_threshold +WHERE hypertable_id = :ht_id; + timezone +-------------------------- + Mon Jun 21 00:00:00 2021 +(1 row) + +-- Add some dummy data for two more weeks and call refresh (no invalidations test case) +INSERT INTO conditions (day, city, temperature) +SELECT ts :: date, city, row_number() OVER () +FROM generate_series('2021-06-28' :: date, '2021-07-11', '1 day') as ts, + unnest(array['Moscow', 'Berlin']) as city; +-- Double check generated data +SELECT to_char(day, 'YYYY-MM-DD'), city, temperature +FROM conditions +WHERE day >= '2021-06-28' +ORDER BY city DESC, day; + to_char | city | temperature +------------+--------+------------- + 2021-06-28 | Moscow | 1 + 2021-06-29 | Moscow | 2 + 2021-06-30 | Moscow | 3 + 2021-07-01 | Moscow | 4 + 2021-07-02 | Moscow | 5 + 2021-07-03 | Moscow | 6 + 2021-07-04 | Moscow | 7 + 2021-07-05 | Moscow | 8 + 2021-07-06 | Moscow | 9 + 2021-07-07 | Moscow | 10 + 2021-07-08 | Moscow | 11 + 2021-07-09 | Moscow | 12 + 2021-07-10 | Moscow | 13 + 2021-07-11 | Moscow | 14 + 2021-06-28 | Berlin | 15 + 2021-06-29 | Berlin | 16 + 2021-06-30 | Berlin | 17 + 2021-07-01 | Berlin | 18 + 2021-07-02 | Berlin | 19 + 2021-07-03 | Berlin | 20 + 2021-07-04 | Berlin | 21 + 2021-07-05 | Berlin | 22 + 2021-07-06 | Berlin | 23 + 2021-07-07 | Berlin | 24 + 2021-07-08 | Berlin | 25 + 2021-07-09 | Berlin | 26 + 2021-07-10 | Berlin | 27 + 2021-07-11 | Berlin | 28 +(28 rows) + +-- Make sure the invalidation threshold was unaffected +SELECT _timescaledb_internal.to_timestamp(watermark) at time zone 'UTC' +FROM _timescaledb_catalog.continuous_aggs_invalidation_threshold +WHERE hypertable_id = :ht_id; + timezone +-------------------------- + Mon Jun 21 00:00:00 2021 +(1 row) + +-- Make sure the invalidation log is empty +SELECT + _timescaledb_internal.to_timestamp(lowest_modified_value) AS lowest, + _timescaledb_internal.to_timestamp(greatest_modified_value) AS greatest +FROM _timescaledb_catalog.continuous_aggs_hypertable_invalidation_log +WHERE hypertable_id = :ht_id; + lowest | greatest +--------+---------- +(0 rows) + +-- Call refresh +CALL refresh_continuous_aggregate('conditions_summary_weekly', '2021-06-28', '2021-07-12'); +SELECT city, to_char(bucket, 'YYYY-MM-DD') AS week, min, max +FROM conditions_summary_weekly +ORDER BY week, city; + city | week | min | max +--------+------------+-----+----- + Moscow | 2021-06-14 | 22 | 30 + Berlin | 2021-06-28 | 15 | 21 + Moscow | 2021-06-28 | 1 | 7 + Berlin | 2021-07-05 | 22 | 28 + Moscow | 2021-07-05 | 8 | 14 +(5 rows) + +-- Make sure the invalidation threshold has changed +SELECT _timescaledb_internal.to_timestamp(watermark) at time zone 'UTC' +FROM _timescaledb_catalog.continuous_aggs_invalidation_threshold +WHERE hypertable_id = :ht_id; + timezone +-------------------------- + Mon Jul 12 00:00:00 2021 +(1 row) + +-- Check if CREATE MATERIALIZED VIEW ... WITH DATA works. +-- Use monthly buckets this time and specify June 2000 as an origin. +CREATE MATERIALIZED VIEW conditions_summary_monthly +WITH (timescaledb.continuous, timescaledb.materialized_only=true) AS +SELECT city, + timescaledb_experimental.time_bucket_ng('1 month', day, '2000-06-01' :: date) AS bucket, + MIN(temperature), + MAX(temperature) +FROM conditions +GROUP BY city, bucket; +NOTICE: refreshing continuous aggregate "conditions_summary_monthly" +SELECT city, to_char(bucket, 'YYYY-MM-DD') AS month, min, max +FROM conditions_summary_monthly +ORDER BY month, city; + city | month | min | max +--------+------------+-----+----- + Berlin | 2021-06-01 | 15 | 17 + Moscow | 2021-06-01 | 1 | 34 + Berlin | 2021-07-01 | 18 | 28 + Moscow | 2021-07-01 | 4 | 14 +(4 rows) + +-- Check the invalidation. +-- Step 1/2. Insert some more data , do a refresh and make sure that the +-- invalidation log is empty. +INSERT INTO conditions (day, city, temperature) +SELECT ts :: date, city, row_number() OVER () +FROM generate_series('2021-09-01' :: date, '2021-09-15', '1 day') as ts, + unnest(array['Moscow', 'Berlin']) as city; +CALL refresh_continuous_aggregate('conditions_summary_monthly', '2021-09-01', '2021-10-01'); +SELECT + _timescaledb_internal.to_timestamp(lowest_modified_value) AS lowest, + _timescaledb_internal.to_timestamp(greatest_modified_value) AS greatest +FROM _timescaledb_catalog.continuous_aggs_hypertable_invalidation_log +WHERE hypertable_id = :ht_id; + lowest | greatest +--------+---------- +(0 rows) + +SELECT city, to_char(bucket, 'YYYY-MM-DD') AS month, min, max +FROM conditions_summary_monthly +ORDER BY month, city; + city | month | min | max +--------+------------+-----+----- + Berlin | 2021-06-01 | 15 | 17 + Moscow | 2021-06-01 | 1 | 34 + Berlin | 2021-07-01 | 18 | 28 + Moscow | 2021-07-01 | 4 | 14 + Berlin | 2021-09-01 | 16 | 30 + Moscow | 2021-09-01 | 1 | 15 +(6 rows) + +-- Step 2/2. Add more data below the invalidation threshold, make sure that the +-- invalidation log is not empty, then do a refresh. +INSERT INTO conditions (day, city, temperature) +SELECT ts :: date, city, (CASE WHEN city = 'Moscow' THEN -40 ELSE 40 END) +FROM generate_series('2021-09-16' :: date, '2021-09-30', '1 day') as ts, + unnest(array['Moscow', 'Berlin']) as city; +SELECT + _timescaledb_internal.to_timestamp(lowest_modified_value) at time zone 'UTC' AS lowest, + _timescaledb_internal.to_timestamp(greatest_modified_value) at time zone 'UTC' AS greatest +FROM _timescaledb_catalog.continuous_aggs_hypertable_invalidation_log +WHERE hypertable_id = :ht_id; + lowest | greatest +--------------------------+-------------------------- + Thu Sep 16 00:00:00 2021 | Thu Sep 30 00:00:00 2021 +(1 row) + +CALL refresh_continuous_aggregate('conditions_summary_monthly', '2021-09-01', '2021-10-01'); +SELECT city, to_char(bucket, 'YYYY-MM-DD') AS month, min, max +FROM conditions_summary_monthly +ORDER BY month, city; + city | month | min | max +--------+------------+-----+----- + Berlin | 2021-06-01 | 15 | 17 + Moscow | 2021-06-01 | 1 | 34 + Berlin | 2021-07-01 | 18 | 28 + Moscow | 2021-07-01 | 4 | 14 + Berlin | 2021-09-01 | 16 | 40 + Moscow | 2021-09-01 | -40 | 15 +(6 rows) + +SELECT + _timescaledb_internal.to_timestamp(lowest_modified_value) AS lowest, + _timescaledb_internal.to_timestamp(greatest_modified_value) AS greatest +FROM _timescaledb_catalog.continuous_aggs_hypertable_invalidation_log +WHERE hypertable_id = :ht_id; + lowest | greatest +--------+---------- +(0 rows) + +-- Create a real-time aggregate with custom origin - June 2000 +CREATE MATERIALIZED VIEW conditions_summary_rt +WITH (timescaledb.continuous) AS +SELECT city, + timescaledb_experimental.time_bucket_ng('1 month', day, '2000-06-01' :: date) AS bucket, + MIN(temperature), + MAX(temperature) +FROM conditions +GROUP BY city, bucket; +NOTICE: refreshing continuous aggregate "conditions_summary_rt" +SELECT city, to_char(bucket, 'YYYY-MM-DD') AS month, min, max +FROM conditions_summary_rt +ORDER BY month, city; + city | month | min | max +--------+------------+-----+----- + Berlin | 2021-06-01 | 15 | 17 + Moscow | 2021-06-01 | 1 | 34 + Berlin | 2021-07-01 | 18 | 28 + Moscow | 2021-07-01 | 4 | 14 + Berlin | 2021-09-01 | 16 | 40 + Moscow | 2021-09-01 | -40 | 15 +(6 rows) + +-- Add some data to the hypertable and make sure it is visible in the cagg +INSERT INTO conditions (day, city, temperature) VALUES + ('2021-10-01', 'Moscow', 1), + ('2021-10-02', 'Moscow', 2), + ('2021-10-03', 'Moscow', 3), + ('2021-10-04', 'Moscow', 4), + ('2021-10-01', 'Berlin', 5), + ('2021-10-02', 'Berlin', 6), + ('2021-10-03', 'Berlin', 7), + ('2021-10-04', 'Berlin', 8); +SELECT city, to_char(bucket, 'YYYY-MM-DD') AS month, min, max +FROM conditions_summary_rt +ORDER BY month, city; + city | month | min | max +--------+------------+-----+----- + Berlin | 2021-06-01 | 15 | 17 + Moscow | 2021-06-01 | 1 | 34 + Berlin | 2021-07-01 | 18 | 28 + Moscow | 2021-07-01 | 4 | 14 + Berlin | 2021-09-01 | 16 | 40 + Moscow | 2021-09-01 | -40 | 15 + Berlin | 2021-10-01 | 5 | 8 + Moscow | 2021-10-01 | 1 | 4 +(8 rows) + +-- Refresh the cagg and make sure that the result of SELECT query didn't change +CALL refresh_continuous_aggregate('conditions_summary_rt', '2021-10-01', '2021-11-01'); +SELECT city, to_char(bucket, 'YYYY-MM-DD') AS month, min, max +FROM conditions_summary_rt +ORDER BY month, city; + city | month | min | max +--------+------------+-----+----- + Berlin | 2021-06-01 | 15 | 17 + Moscow | 2021-06-01 | 1 | 34 + Berlin | 2021-07-01 | 18 | 28 + Moscow | 2021-07-01 | 4 | 14 + Berlin | 2021-09-01 | 16 | 40 + Moscow | 2021-09-01 | -40 | 15 + Berlin | 2021-10-01 | 5 | 8 + Moscow | 2021-10-01 | 1 | 4 +(8 rows) + +-- Add some more data, enable compression, compress the chunks and repeat the test +INSERT INTO conditions (day, city, temperature) VALUES + ('2021-11-01', 'Moscow', 11), + ('2021-11-02', 'Moscow', 12), + ('2021-11-03', 'Moscow', 13), + ('2021-11-04', 'Moscow', 14), + ('2021-11-01', 'Berlin', 15), + ('2021-11-02', 'Berlin', 16), + ('2021-11-03', 'Berlin', 17), + ('2021-11-04', 'Berlin', 18); +ALTER TABLE conditions SET ( + timescaledb.compress, + timescaledb.compress_segmentby = 'city' +); +SELECT compress_chunk(ch) FROM show_chunks('conditions') AS ch; + compress_chunk +----------------------------------------- + _timescaledb_internal._hyper_1_1_chunk + _timescaledb_internal._hyper_1_2_chunk + _timescaledb_internal._hyper_1_3_chunk + _timescaledb_internal._hyper_1_4_chunk + _timescaledb_internal._hyper_1_5_chunk + _timescaledb_internal._hyper_1_6_chunk + _timescaledb_internal._hyper_1_7_chunk + _timescaledb_internal._hyper_1_8_chunk + _timescaledb_internal._hyper_1_9_chunk + _timescaledb_internal._hyper_1_10_chunk + _timescaledb_internal._hyper_1_11_chunk + _timescaledb_internal._hyper_1_12_chunk + _timescaledb_internal._hyper_1_13_chunk + _timescaledb_internal._hyper_1_14_chunk + _timescaledb_internal._hyper_1_16_chunk + _timescaledb_internal._hyper_1_17_chunk + _timescaledb_internal._hyper_1_18_chunk + _timescaledb_internal._hyper_1_19_chunk + _timescaledb_internal._hyper_1_20_chunk + _timescaledb_internal._hyper_1_21_chunk + _timescaledb_internal._hyper_1_22_chunk + _timescaledb_internal._hyper_1_23_chunk + _timescaledb_internal._hyper_1_24_chunk + _timescaledb_internal._hyper_1_25_chunk + _timescaledb_internal._hyper_1_26_chunk + _timescaledb_internal._hyper_1_27_chunk + _timescaledb_internal._hyper_1_28_chunk + _timescaledb_internal._hyper_1_29_chunk + _timescaledb_internal._hyper_1_34_chunk + _timescaledb_internal._hyper_1_35_chunk + _timescaledb_internal._hyper_1_36_chunk + _timescaledb_internal._hyper_1_37_chunk + _timescaledb_internal._hyper_1_38_chunk + _timescaledb_internal._hyper_1_39_chunk + _timescaledb_internal._hyper_1_40_chunk + _timescaledb_internal._hyper_1_41_chunk + _timescaledb_internal._hyper_1_42_chunk + _timescaledb_internal._hyper_1_43_chunk + _timescaledb_internal._hyper_1_44_chunk + _timescaledb_internal._hyper_1_45_chunk + _timescaledb_internal._hyper_1_46_chunk + _timescaledb_internal._hyper_1_47_chunk + _timescaledb_internal._hyper_1_48_chunk + _timescaledb_internal._hyper_1_50_chunk + _timescaledb_internal._hyper_1_51_chunk + _timescaledb_internal._hyper_1_52_chunk + _timescaledb_internal._hyper_1_53_chunk + _timescaledb_internal._hyper_1_54_chunk + _timescaledb_internal._hyper_1_55_chunk + _timescaledb_internal._hyper_1_56_chunk + _timescaledb_internal._hyper_1_57_chunk + _timescaledb_internal._hyper_1_58_chunk + _timescaledb_internal._hyper_1_59_chunk + _timescaledb_internal._hyper_1_60_chunk + _timescaledb_internal._hyper_1_61_chunk + _timescaledb_internal._hyper_1_62_chunk + _timescaledb_internal._hyper_1_63_chunk + _timescaledb_internal._hyper_1_64_chunk + _timescaledb_internal._hyper_1_68_chunk + _timescaledb_internal._hyper_1_69_chunk + _timescaledb_internal._hyper_1_70_chunk + _timescaledb_internal._hyper_1_71_chunk + _timescaledb_internal._hyper_1_73_chunk + _timescaledb_internal._hyper_1_74_chunk + _timescaledb_internal._hyper_1_75_chunk + _timescaledb_internal._hyper_1_76_chunk +(66 rows) + +-- Data for 2021-11 is seen because the cagg is real-time +SELECT city, to_char(bucket, 'YYYY-MM-DD') AS month, min, max +FROM conditions_summary_rt +ORDER BY month, city; + city | month | min | max +--------+------------+-----+----- + Berlin | 2021-06-01 | 15 | 17 + Moscow | 2021-06-01 | 1 | 34 + Berlin | 2021-07-01 | 18 | 28 + Moscow | 2021-07-01 | 4 | 14 + Berlin | 2021-09-01 | 16 | 40 + Moscow | 2021-09-01 | -40 | 15 + Berlin | 2021-10-01 | 5 | 8 + Moscow | 2021-10-01 | 1 | 4 + Berlin | 2021-11-01 | 15 | 18 + Moscow | 2021-11-01 | 11 | 14 +(10 rows) + +CALL refresh_continuous_aggregate('conditions_summary_rt', '2021-11-01', '2021-12-01'); +-- Data for 2021-11 is seen because the cagg was refreshed +SELECT city, to_char(bucket, 'YYYY-MM-DD') AS month, min, max +FROM conditions_summary_rt +ORDER BY month, city; + city | month | min | max +--------+------------+-----+----- + Berlin | 2021-06-01 | 15 | 17 + Moscow | 2021-06-01 | 1 | 34 + Berlin | 2021-07-01 | 18 | 28 + Moscow | 2021-07-01 | 4 | 14 + Berlin | 2021-09-01 | 16 | 40 + Moscow | 2021-09-01 | -40 | 15 + Berlin | 2021-10-01 | 5 | 8 + Moscow | 2021-10-01 | 1 | 4 + Berlin | 2021-11-01 | 15 | 18 + Moscow | 2021-11-01 | 11 | 14 +(10 rows) + +-- Clean up +DROP TABLE conditions CASCADE; +NOTICE: drop cascades to 66 other objects +NOTICE: drop cascades to 7 other objects +NOTICE: drop cascades to 3 other objects +NOTICE: drop cascades to 3 other objects +NOTICE: drop cascades to 5 other objects +-- Test caggs with monthly buckets and custom origin on top of distributed hypertable +\c :TEST_DBNAME :ROLE_CLUSTER_SUPERUSER +\set DATA_NODE_1 :TEST_DBNAME _1 +\set DATA_NODE_2 :TEST_DBNAME _2 +\set DATA_NODE_3 :TEST_DBNAME _3 +SELECT (add_data_node (name, host => 'localhost', DATABASE => name)).* +FROM (VALUES (:'DATA_NODE_1'), (:'DATA_NODE_2'), (:'DATA_NODE_3')) v (name); + node_name | host | port | database | node_created | database_created | extension_created +----------------------+-----------+-------+----------------------+--------------+------------------+------------------- + db_exp_cagg_origin_1 | localhost | 55432 | db_exp_cagg_origin_1 | t | t | t + db_exp_cagg_origin_2 | localhost | 55432 | db_exp_cagg_origin_2 | t | t | t + db_exp_cagg_origin_3 | localhost | 55432 | db_exp_cagg_origin_3 | t | t | t +(3 rows) + +GRANT USAGE ON FOREIGN SERVER :DATA_NODE_1, :DATA_NODE_2, :DATA_NODE_3 TO PUBLIC; +SET ROLE :ROLE_DEFAULT_PERM_USER; +CREATE TABLE conditions_dist( + day date NOT NULL, + temperature INT NOT NULL); +SELECT table_name FROM create_distributed_hypertable('conditions_dist', 'day', chunk_time_interval => INTERVAL '1 day'); + table_name +----------------- + conditions_dist +(1 row) + +INSERT INTO conditions_dist(day, temperature) +SELECT ts, date_part('month', ts)*100 + date_part('day', ts) +FROM generate_series('2010-01-01' :: date, '2010-03-01' :: date - interval '1 day', '1 day') as ts; +CREATE MATERIALIZED VIEW conditions_dist_1m +WITH (timescaledb.continuous) AS +SELECT + timescaledb_experimental.time_bucket_ng('1 month', day, '2010-01-01') AS bucket, + MIN(temperature), + MAX(temperature) +FROM conditions_dist +GROUP BY bucket; +NOTICE: refreshing continuous aggregate "conditions_dist_1m" +SELECT mat_hypertable_id AS cagg_id, raw_hypertable_id AS ht_id +FROM _timescaledb_catalog.continuous_agg +WHERE user_view_name = 'conditions_dist_1m' +\gset +SELECT bucket_width +FROM _timescaledb_catalog.continuous_agg +WHERE mat_hypertable_id = :cagg_id; + bucket_width +-------------- + -1 +(1 row) + +SELECT experimental, name, bucket_width, origin, timezone +FROM _timescaledb_catalog.continuous_aggs_bucket_function +WHERE mat_hypertable_id = :cagg_id; + experimental | name | bucket_width | origin | timezone +--------------+----------------+--------------+--------------------------+---------- + t | time_bucket_ng | @ 1 mon | Fri Jan 01 00:00:00 2010 | +(1 row) + +SELECT * FROM conditions_dist_1m ORDER BY bucket; + bucket | min | max +------------+-----+----- + 01-01-2010 | 101 | 131 + 02-01-2010 | 201 | 228 +(2 rows) + +-- Same test but with non-realtime, NO DATA aggregate and manual refresh +CREATE MATERIALIZED VIEW conditions_dist_1m_manual +WITH (timescaledb.continuous, timescaledb.materialized_only=true) AS +SELECT + timescaledb_experimental.time_bucket_ng('1 month', day, '2005-01-01') AS bucket, + MIN(temperature), + MAX(temperature) +FROM conditions_dist +GROUP BY bucket +WITH NO DATA; +SELECT * FROM conditions_dist_1m_manual ORDER BY bucket; + bucket | min | max +--------+-----+----- +(0 rows) + +CALL refresh_continuous_aggregate('conditions_dist_1m_manual', '2010-01-01', '2010-03-01'); +SELECT * FROM conditions_dist_1m_manual ORDER BY bucket; + bucket | min | max +------------+-----+----- + 01-01-2010 | 101 | 131 + 02-01-2010 | 201 | 228 +(2 rows) + +-- Check invalidation for caggs on top of distributed hypertable +INSERT INTO conditions_dist(day, temperature) +VALUES ('2010-01-15', 999), ('2010-02-15', -999), ('2010-03-01', 15); +SELECT * FROM conditions_dist_1m ORDER BY bucket; + bucket | min | max +------------+-----+----- + 01-01-2010 | 101 | 131 + 02-01-2010 | 201 | 228 + 03-01-2010 | 15 | 15 +(3 rows) + +SELECT * FROM conditions_dist_1m_manual ORDER BY bucket; + bucket | min | max +------------+-----+----- + 01-01-2010 | 101 | 131 + 02-01-2010 | 201 | 228 +(2 rows) + +CALL refresh_continuous_aggregate('conditions_dist_1m', '2010-01-01', '2010-04-01'); +SELECT * FROM conditions_dist_1m ORDER BY bucket; + bucket | min | max +------------+------+----- + 01-01-2010 | 101 | 999 + 02-01-2010 | -999 | 228 + 03-01-2010 | 15 | 15 +(3 rows) + +SELECT * FROM conditions_dist_1m_manual ORDER BY bucket; + bucket | min | max +------------+-----+----- + 01-01-2010 | 101 | 131 + 02-01-2010 | 201 | 228 +(2 rows) + +CALL refresh_continuous_aggregate('conditions_dist_1m_manual', '2010-01-01', '2010-04-01'); +SELECT * FROM conditions_dist_1m ORDER BY bucket; + bucket | min | max +------------+------+----- + 01-01-2010 | 101 | 999 + 02-01-2010 | -999 | 228 + 03-01-2010 | 15 | 15 +(3 rows) + +SELECT * FROM conditions_dist_1m_manual ORDER BY bucket; + bucket | min | max +------------+------+----- + 01-01-2010 | 101 | 999 + 02-01-2010 | -999 | 228 + 03-01-2010 | 15 | 15 +(3 rows) + +-- Compression on top of distributed hypertables +ALTER MATERIALIZED VIEW conditions_dist_1m_manual SET ( timescaledb.compress ); +SELECT compress_chunk(ch) +FROM show_chunks('conditions_dist_1m_manual') ch limit 1; + compress_chunk +------------------------------------------ + _timescaledb_internal._hyper_8_205_chunk +(1 row) + +SELECT * FROM conditions_dist_1m_manual ORDER BY bucket; + bucket | min | max +------------+------+----- + 01-01-2010 | 101 | 999 + 02-01-2010 | -999 | 228 + 03-01-2010 | 15 | 15 +(3 rows) + +-- Clean up +DROP TABLE conditions_dist CASCADE; +NOTICE: drop cascades to 5 other objects +NOTICE: drop cascades to 3 other objects +NOTICE: drop cascades to 3 other objects +\c :TEST_DBNAME :ROLE_CLUSTER_SUPERUSER +SELECT delete_data_node(name) +FROM (VALUES (:'DATA_NODE_1'), (:'DATA_NODE_2'), (:'DATA_NODE_3')) v (name); + delete_data_node +------------------ + t + t + t +(3 rows) + +SET ROLE :ROLE_DEFAULT_PERM_USER; +-- Test the specific code path of creating a CAGG on top of empty hypertable. +CREATE TABLE conditions_empty( + day DATE NOT NULL, + city text NOT NULL, + temperature INT NOT NULL); +SELECT create_hypertable( + 'conditions_empty', 'day', + chunk_time_interval => INTERVAL '1 day' +); + create_hypertable +-------------------------------- + (10,public,conditions_empty,t) +(1 row) + +CREATE MATERIALIZED VIEW conditions_summary_empty +WITH (timescaledb.continuous, timescaledb.materialized_only=true) AS +SELECT city, + timescaledb_experimental.time_bucket_ng('1 month', day, '2005-02-01') AS bucket, + MIN(temperature), + MAX(temperature) +FROM conditions_empty +GROUP BY city, bucket; +NOTICE: continuous aggregate "conditions_summary_empty" is already up-to-date +SELECT city, to_char(bucket, 'YYYY-MM-DD') AS month, min, max +FROM conditions_summary_empty +ORDER BY month, city; + city | month | min | max +------+-------+-----+----- +(0 rows) + +-- The test above changes the record that gets added to the invalidation log +-- for an empty table. Make sure it doesn't have any unintended side-effects +-- and the refreshing works as expected. +INSERT INTO conditions_empty (day, city, temperature) VALUES + ('2021-06-14', 'Moscow', 26), + ('2021-06-15', 'Moscow', 22), + ('2021-06-16', 'Moscow', 24), + ('2021-06-17', 'Moscow', 24), + ('2021-06-18', 'Moscow', 27), + ('2021-06-19', 'Moscow', 28), + ('2021-06-20', 'Moscow', 30), + ('2021-06-21', 'Moscow', 31), + ('2021-06-22', 'Moscow', 34), + ('2021-06-23', 'Moscow', 34), + ('2021-06-24', 'Moscow', 34), + ('2021-06-25', 'Moscow', 32), + ('2021-06-26', 'Moscow', 32), + ('2021-06-27', 'Moscow', 31); +CALL refresh_continuous_aggregate('conditions_summary_empty', '2021-06-01', '2021-07-01'); +SELECT city, to_char(bucket, 'YYYY-MM-DD') AS month, min, max +FROM conditions_summary_empty +ORDER BY month, city; + city | month | min | max +--------+------------+-----+----- + Moscow | 2021-06-01 | 22 | 34 +(1 row) + +-- Clean up +DROP TABLE conditions_empty CASCADE; +NOTICE: drop cascades to 2 other objects +NOTICE: drop cascades to table _timescaledb_internal._hyper_11_225_chunk +-- Make sure add_continuous_aggregate_policy() works +CREATE TABLE conditions_policy( + day DATE NOT NULL, + city text NOT NULL, + temperature INT NOT NULL); +SELECT create_hypertable( + 'conditions_policy', 'day', + chunk_time_interval => INTERVAL '1 day' +); + create_hypertable +--------------------------------- + (12,public,conditions_policy,t) +(1 row) + +INSERT INTO conditions_policy (day, city, temperature) VALUES + ('2021-06-14', 'Moscow', 26), + ('2021-06-15', 'Moscow', 22), + ('2021-06-16', 'Moscow', 24), + ('2021-06-17', 'Moscow', 24), + ('2021-06-18', 'Moscow', 27), + ('2021-06-19', 'Moscow', 28), + ('2021-06-20', 'Moscow', 30), + ('2021-06-21', 'Moscow', 31), + ('2021-06-22', 'Moscow', 34), + ('2021-06-23', 'Moscow', 34), + ('2021-06-24', 'Moscow', 34), + ('2021-06-25', 'Moscow', 32), + ('2021-06-26', 'Moscow', 32), + ('2021-06-27', 'Moscow', 31); +CREATE MATERIALIZED VIEW conditions_summary_policy +WITH (timescaledb.continuous, timescaledb.materialized_only=true) AS +SELECT city, + timescaledb_experimental.time_bucket_ng('1 month', day, '2005-03-01') AS bucket, + MIN(temperature), + MAX(temperature) +FROM conditions_policy +GROUP BY city, bucket; +NOTICE: refreshing continuous aggregate "conditions_summary_policy" +SELECT * FROM conditions_summary_policy; + city | bucket | min | max +--------+------------+-----+----- + Moscow | 06-01-2021 | 22 | 34 +(1 row) + +\set ON_ERROR_STOP 0 +-- Check for "policy refresh window too small" error +SELECT add_continuous_aggregate_policy('conditions_summary_policy', + -- Historically, 1 month is just a synonym to 30 days here. + -- See interval_to_int64() and interval_to_int128(). + start_offset => INTERVAL '2 months', + end_offset => INTERVAL '1 day', + schedule_interval => INTERVAL '1 hour'); +ERROR: policy refresh window too small +\set ON_ERROR_STOP 1 +SELECT add_continuous_aggregate_policy('conditions_summary_policy', + start_offset => INTERVAL '65 days', + end_offset => INTERVAL '1 day', + schedule_interval => INTERVAL '1 hour'); + add_continuous_aggregate_policy +--------------------------------- + 1000 +(1 row) + +-- Clean up +DROP TABLE conditions_policy CASCADE; +NOTICE: drop cascades to 2 other objects +NOTICE: drop cascades to table _timescaledb_internal._hyper_13_240_chunk +-- Make sure CAGGs with custom origin work for timestamp type +CREATE TABLE conditions_timestamp( + tstamp TIMESTAMP NOT NULL, + city TEXT NOT NULL, + temperature INT NOT NULL); +SELECT create_hypertable( + 'conditions_timestamp', 'tstamp', + chunk_time_interval => INTERVAL '1 day' +); + create_hypertable +------------------------------------ + (14,public,conditions_timestamp,t) +(1 row) + +CREATE MATERIALIZED VIEW conditions_summary_timestamp +WITH (timescaledb.continuous) AS +SELECT city, + timescaledb_experimental.time_bucket_ng('12 hours', tstamp, '2000-06-01 12:00:00') AS bucket, + MIN(temperature), + MAX(temperature) +FROM conditions_timestamp +GROUP BY city, bucket; +NOTICE: continuous aggregate "conditions_summary_timestamp" is already up-to-date +SELECT city, to_char(bucket, 'YYYY-MM-DD HH24:MI:SS') AS b, min, max +FROM conditions_summary_timestamp +ORDER BY b, city; + city | b | min | max +------+---+-----+----- +(0 rows) + +-- Add some data to the hypertable and make sure it is visible in the cagg +INSERT INTO conditions_timestamp(tstamp, city, temperature) +SELECT ts, city, (CASE WHEN city = 'Moscow' THEN 20000 ELSE 10000 END) + date_part('day', ts)*100 + date_part('hour', ts) +FROM + generate_series('2010-01-01 00:00:00' :: timestamp, '2010-01-02 00:00:00' :: timestamp - interval '1 hour', '1 hour') as ts, + unnest(array['Moscow', 'Berlin']) as city; +SELECT city, to_char(bucket, 'YYYY-MM-DD HH24:MI:SS') AS b, min, max +FROM conditions_summary_timestamp +ORDER BY b, city; + city | b | min | max +--------+---------------------+-------+------- + Berlin | 2010-01-01 00:00:00 | 10100 | 10111 + Moscow | 2010-01-01 00:00:00 | 20100 | 20111 + Berlin | 2010-01-01 12:00:00 | 10112 | 10123 + Moscow | 2010-01-01 12:00:00 | 20112 | 20123 +(4 rows) + +-- Refresh the cagg and make sure that the result of SELECT query didn't change +CALL refresh_continuous_aggregate('conditions_summary_timestamp', '2010-01-01 00:00:00', '2010-01-02 00:00:00'); +SELECT city, to_char(bucket, 'YYYY-MM-DD HH24:MI:SS') AS b, min, max +FROM conditions_summary_timestamp +ORDER BY b, city; + city | b | min | max +--------+---------------------+-------+------- + Berlin | 2010-01-01 00:00:00 | 10100 | 10111 + Moscow | 2010-01-01 00:00:00 | 20100 | 20111 + Berlin | 2010-01-01 12:00:00 | 10112 | 10123 + Moscow | 2010-01-01 12:00:00 | 20112 | 20123 +(4 rows) + +-- Add some more data, enable compression, compress the chunks and repeat the test +INSERT INTO conditions_timestamp(tstamp, city, temperature) +SELECT ts, city, (CASE WHEN city = 'Moscow' THEN 20000 ELSE 10000 END) + date_part('day', ts)*100 + date_part('hour', ts) +FROM + generate_series('2010-01-02 00:00:00' :: timestamp, '2010-01-03 00:00:00' :: timestamp - interval '1 hour', '1 hour') as ts, + unnest(array['Moscow', 'Berlin']) as city; +ALTER TABLE conditions_timestamp SET ( + timescaledb.compress, + timescaledb.compress_segmentby = 'city' +); +SELECT compress_chunk(ch) FROM show_chunks('conditions_timestamp') AS ch; + compress_chunk +------------------------------------------- + _timescaledb_internal._hyper_14_241_chunk + _timescaledb_internal._hyper_14_243_chunk +(2 rows) + +-- New data is seen because the cagg is real-time +SELECT city, to_char(bucket, 'YYYY-MM-DD HH24:MI:SS') AS b, min, max +FROM conditions_summary_timestamp +ORDER BY b, city; + city | b | min | max +--------+---------------------+-------+------- + Berlin | 2010-01-01 00:00:00 | 10100 | 10111 + Moscow | 2010-01-01 00:00:00 | 20100 | 20111 + Berlin | 2010-01-01 12:00:00 | 10112 | 10123 + Moscow | 2010-01-01 12:00:00 | 20112 | 20123 + Berlin | 2010-01-02 00:00:00 | 10200 | 10211 + Moscow | 2010-01-02 00:00:00 | 20200 | 20211 + Berlin | 2010-01-02 12:00:00 | 10212 | 10223 + Moscow | 2010-01-02 12:00:00 | 20212 | 20223 +(8 rows) + +CALL refresh_continuous_aggregate('conditions_summary_timestamp', '2010-01-02 00:00:00', '2010-01-03 00:00:00'); +-- New data is seen because the cagg was refreshed +SELECT city, to_char(bucket, 'YYYY-MM-DD HH24:MI:SS') AS b, min, max +FROM conditions_summary_timestamp +ORDER BY b, city; + city | b | min | max +--------+---------------------+-------+------- + Berlin | 2010-01-01 00:00:00 | 10100 | 10111 + Moscow | 2010-01-01 00:00:00 | 20100 | 20111 + Berlin | 2010-01-01 12:00:00 | 10112 | 10123 + Moscow | 2010-01-01 12:00:00 | 20112 | 20123 + Berlin | 2010-01-02 00:00:00 | 10200 | 10211 + Moscow | 2010-01-02 00:00:00 | 20200 | 20211 + Berlin | 2010-01-02 12:00:00 | 10212 | 10223 + Moscow | 2010-01-02 12:00:00 | 20212 | 20223 +(8 rows) + +-- Add a refresh policy +SELECT add_continuous_aggregate_policy('conditions_summary_timestamp', + start_offset => INTERVAL '25 hours', + end_offset => INTERVAL '1 hour', + schedule_interval => INTERVAL '30 minutes'); + add_continuous_aggregate_policy +--------------------------------- + 1001 +(1 row) + +-- Clean up +DROP TABLE conditions_timestamp CASCADE; +NOTICE: drop cascades to 2 other objects +NOTICE: drop cascades to 3 other objects +NOTICE: drop cascades to table _timescaledb_internal._hyper_15_242_chunk +-- Make sure CAGGs with custom origin work for timestamptz type +CREATE TABLE conditions_timestamptz( + tstamp TIMESTAMPTZ NOT NULL, + city TEXT NOT NULL, + temperature INT NOT NULL); +SELECT create_hypertable( + 'conditions_timestamptz', 'tstamp', + chunk_time_interval => INTERVAL '1 day' +); + create_hypertable +-------------------------------------- + (17,public,conditions_timestamptz,t) +(1 row) + +\set ON_ERROR_STOP 0 +-- For monthly buckets origin should be the first day of the month in given timezone +-- 2020-06-02 00:00:00 MSK == 2020-06-01 21:00:00 UTC +CREATE MATERIALIZED VIEW conditions_summary_timestamptz +WITH (timescaledb.continuous, timescaledb.materialized_only=true) AS +SELECT city, + timescaledb_experimental.time_bucket_ng('1 month', tstamp, '2020-06-02 00:00:00 MSK', 'Europe/Moscow') AS bucket, + MIN(temperature), + MAX(temperature) +FROM conditions_timestamptz +GROUP BY city, bucket; +ERROR: for monthly buckets origin must be the first day of the month +-- Make sure buckets like '1 months 15 days" (fixed+variable-sized) are not allowed +CREATE MATERIALIZED VIEW conditions_summary_timestamptz +WITH (timescaledb.continuous, timescaledb.materialized_only=true) AS +SELECT city, + timescaledb_experimental.time_bucket_ng('1 month 15 days', tstamp, '2020-06-01 00:00:00 MSK', 'Europe/Moscow') AS bucket, + MIN(temperature), + MAX(temperature) +FROM conditions_timestamptz +GROUP BY city, bucket; +ERROR: invalid interval specified +\set ON_ERROR_STOP 1 +CREATE MATERIALIZED VIEW conditions_summary_timestamptz +WITH (timescaledb.continuous) AS +SELECT city, + timescaledb_experimental.time_bucket_ng('12 hours', tstamp, '2020-06-01 12:00:00 MSK', 'Europe/Moscow') AS bucket, + MIN(temperature), + MAX(temperature) +FROM conditions_timestamptz +GROUP BY city, bucket; +NOTICE: continuous aggregate "conditions_summary_timestamptz" is already up-to-date +SELECT city, to_char(bucket at time zone 'MSK', 'YYYY-MM-DD HH24:MI:SS') AS b, min, max +FROM conditions_summary_timestamptz +ORDER BY b, city; + city | b | min | max +------+---+-----+----- +(0 rows) + +-- Add some data to the hypertable and make sure it is visible in the cagg +INSERT INTO conditions_timestamptz(tstamp, city, temperature) +SELECT ts, city, + (CASE WHEN city = 'Moscow' THEN 20000 ELSE 10000 END) + + date_part('day', ts at time zone 'MSK')*100 + + date_part('hour', ts at time zone 'MSK') +FROM + generate_series('2022-01-01 00:00:00 MSK' :: timestamptz, '2022-01-02 00:00:00 MSK' :: timestamptz - interval '1 hour', '1 hour') as ts, + unnest(array['Moscow', 'Berlin']) as city; +-- Check the data +SELECT to_char(tstamp at time zone 'MSK', 'YYYY-MM-DD HH24:MI:SS') AS ts, city, temperature FROM conditions_timestamptz +ORDER BY ts, city; + ts | city | temperature +---------------------+--------+------------- + 2022-01-01 00:00:00 | Berlin | 10100 + 2022-01-01 00:00:00 | Moscow | 20100 + 2022-01-01 01:00:00 | Berlin | 10101 + 2022-01-01 01:00:00 | Moscow | 20101 + 2022-01-01 02:00:00 | Berlin | 10102 + 2022-01-01 02:00:00 | Moscow | 20102 + 2022-01-01 03:00:00 | Berlin | 10103 + 2022-01-01 03:00:00 | Moscow | 20103 + 2022-01-01 04:00:00 | Berlin | 10104 + 2022-01-01 04:00:00 | Moscow | 20104 + 2022-01-01 05:00:00 | Berlin | 10105 + 2022-01-01 05:00:00 | Moscow | 20105 + 2022-01-01 06:00:00 | Berlin | 10106 + 2022-01-01 06:00:00 | Moscow | 20106 + 2022-01-01 07:00:00 | Berlin | 10107 + 2022-01-01 07:00:00 | Moscow | 20107 + 2022-01-01 08:00:00 | Berlin | 10108 + 2022-01-01 08:00:00 | Moscow | 20108 + 2022-01-01 09:00:00 | Berlin | 10109 + 2022-01-01 09:00:00 | Moscow | 20109 + 2022-01-01 10:00:00 | Berlin | 10110 + 2022-01-01 10:00:00 | Moscow | 20110 + 2022-01-01 11:00:00 | Berlin | 10111 + 2022-01-01 11:00:00 | Moscow | 20111 + 2022-01-01 12:00:00 | Berlin | 10112 + 2022-01-01 12:00:00 | Moscow | 20112 + 2022-01-01 13:00:00 | Berlin | 10113 + 2022-01-01 13:00:00 | Moscow | 20113 + 2022-01-01 14:00:00 | Berlin | 10114 + 2022-01-01 14:00:00 | Moscow | 20114 + 2022-01-01 15:00:00 | Berlin | 10115 + 2022-01-01 15:00:00 | Moscow | 20115 + 2022-01-01 16:00:00 | Berlin | 10116 + 2022-01-01 16:00:00 | Moscow | 20116 + 2022-01-01 17:00:00 | Berlin | 10117 + 2022-01-01 17:00:00 | Moscow | 20117 + 2022-01-01 18:00:00 | Berlin | 10118 + 2022-01-01 18:00:00 | Moscow | 20118 + 2022-01-01 19:00:00 | Berlin | 10119 + 2022-01-01 19:00:00 | Moscow | 20119 + 2022-01-01 20:00:00 | Berlin | 10120 + 2022-01-01 20:00:00 | Moscow | 20120 + 2022-01-01 21:00:00 | Berlin | 10121 + 2022-01-01 21:00:00 | Moscow | 20121 + 2022-01-01 22:00:00 | Berlin | 10122 + 2022-01-01 22:00:00 | Moscow | 20122 + 2022-01-01 23:00:00 | Berlin | 10123 + 2022-01-01 23:00:00 | Moscow | 20123 +(48 rows) + +SELECT city, to_char(bucket at time zone 'MSK', 'YYYY-MM-DD HH24:MI:SS') AS b, min, max +FROM conditions_summary_timestamptz +ORDER BY b, city; + city | b | min | max +--------+---------------------+-------+------- + Berlin | 2022-01-01 00:00:00 | 10100 | 10111 + Moscow | 2022-01-01 00:00:00 | 20100 | 20111 + Berlin | 2022-01-01 12:00:00 | 10112 | 10123 + Moscow | 2022-01-01 12:00:00 | 20112 | 20123 +(4 rows) + +-- Refresh the cagg and make sure that the result of SELECT query didn't change +CALL refresh_continuous_aggregate('conditions_summary_timestamptz', '2022-01-01 00:00:00 MSK', '2022-01-02 00:00:00 MSK'); +SELECT city, to_char(bucket at time zone 'MSK', 'YYYY-MM-DD HH24:MI:SS') AS b, min, max +FROM conditions_summary_timestamptz +ORDER BY b, city; + city | b | min | max +--------+---------------------+-------+------- + Berlin | 2022-01-01 00:00:00 | 10100 | 10111 + Moscow | 2022-01-01 00:00:00 | 20100 | 20111 + Berlin | 2022-01-01 12:00:00 | 10112 | 10123 + Moscow | 2022-01-01 12:00:00 | 20112 | 20123 +(4 rows) + +-- Add some more data, enable compression, compress the chunks and repeat the test +INSERT INTO conditions_timestamptz(tstamp, city, temperature) +SELECT ts, city, + (CASE WHEN city = 'Moscow' THEN 20000 ELSE 10000 END) + + date_part('day', ts at time zone 'MSK')*100 + + date_part('hour', ts at time zone 'MSK') +FROM + generate_series('2022-01-02 00:00:00 MSK' :: timestamptz, '2022-01-03 00:00:00 MSK' :: timestamptz - interval '1 hour', '1 hour') as ts, + unnest(array['Moscow', 'Berlin']) as city; +ALTER TABLE conditions_timestamptz SET ( + timescaledb.compress, + timescaledb.compress_segmentby = 'city' +); +SELECT compress_chunk(ch) FROM show_chunks('conditions_timestamptz') AS ch; + compress_chunk +------------------------------------------- + _timescaledb_internal._hyper_17_246_chunk + _timescaledb_internal._hyper_17_247_chunk + _timescaledb_internal._hyper_17_249_chunk +(3 rows) + +-- New data is seen because the cagg is real-time +SELECT city, to_char(bucket at time zone 'MSK', 'YYYY-MM-DD HH24:MI:SS') AS b, min, max +FROM conditions_summary_timestamptz +ORDER BY b, city; + city | b | min | max +--------+---------------------+-------+------- + Berlin | 2022-01-01 00:00:00 | 10100 | 10111 + Moscow | 2022-01-01 00:00:00 | 20100 | 20111 + Berlin | 2022-01-01 12:00:00 | 10112 | 10123 + Moscow | 2022-01-01 12:00:00 | 20112 | 20123 + Berlin | 2022-01-02 00:00:00 | 10200 | 10211 + Moscow | 2022-01-02 00:00:00 | 20200 | 20211 + Berlin | 2022-01-02 12:00:00 | 10212 | 10223 + Moscow | 2022-01-02 12:00:00 | 20212 | 20223 +(8 rows) + +CALL refresh_continuous_aggregate('conditions_summary_timestamptz', '2022-01-02 00:00:00 MSK', '2022-01-03 00:00:00 MSK'); +-- New data is seen because the cagg was refreshed +SELECT city, to_char(bucket at time zone 'MSK', 'YYYY-MM-DD HH24:MI:SS') AS b, min, max +FROM conditions_summary_timestamptz +ORDER BY b, city; + city | b | min | max +--------+---------------------+-------+------- + Berlin | 2022-01-01 00:00:00 | 10100 | 10111 + Moscow | 2022-01-01 00:00:00 | 20100 | 20111 + Berlin | 2022-01-01 12:00:00 | 10112 | 10123 + Moscow | 2022-01-01 12:00:00 | 20112 | 20123 + Berlin | 2022-01-02 00:00:00 | 10200 | 10211 + Moscow | 2022-01-02 00:00:00 | 20200 | 20211 + Berlin | 2022-01-02 12:00:00 | 10212 | 10223 + Moscow | 2022-01-02 12:00:00 | 20212 | 20223 +(8 rows) + +-- Add a refresh policy +SELECT add_continuous_aggregate_policy('conditions_summary_timestamptz', + start_offset => INTERVAL '25 hours', + end_offset => INTERVAL '1 hour', + schedule_interval => INTERVAL '30 minutes'); + add_continuous_aggregate_policy +--------------------------------- + 1002 +(1 row) + +-- Clean up +DROP TABLE conditions_timestamptz CASCADE; +NOTICE: drop cascades to 3 other objects +NOTICE: drop cascades to 3 other objects +NOTICE: drop cascades to table _timescaledb_internal._hyper_18_248_chunk diff --git a/tsl/test/expected/cagg_with_timezone.out b/tsl/test/expected/exp_cagg_timezone.out similarity index 98% rename from tsl/test/expected/cagg_with_timezone.out rename to tsl/test/expected/exp_cagg_timezone.out index c3ab985b5..1ba6dfd1f 100644 --- a/tsl/test/expected/cagg_with_timezone.out +++ b/tsl/test/expected/exp_cagg_timezone.out @@ -655,11 +655,11 @@ ORDER by month, city; \set DATA_NODE_3 :TEST_DBNAME _3 SELECT (add_data_node (name, host => 'localhost', DATABASE => name)).* FROM (VALUES (:'DATA_NODE_1'), (:'DATA_NODE_2'), (:'DATA_NODE_3')) v (name); - node_name | host | port | database | node_created | database_created | extension_created --------------------------+-----------+-------+-------------------------+--------------+------------------+------------------- - db_cagg_with_timezone_1 | localhost | 55432 | db_cagg_with_timezone_1 | t | t | t - db_cagg_with_timezone_2 | localhost | 55432 | db_cagg_with_timezone_2 | t | t | t - db_cagg_with_timezone_3 | localhost | 55432 | db_cagg_with_timezone_3 | t | t | t + node_name | host | port | database | node_created | database_created | extension_created +------------------------+-----------+-------+------------------------+--------------+------------------+------------------- + db_exp_cagg_timezone_1 | localhost | 55432 | db_exp_cagg_timezone_1 | t | t | t + db_exp_cagg_timezone_2 | localhost | 55432 | db_exp_cagg_timezone_2 | t | t | t + db_exp_cagg_timezone_3 | localhost | 55432 | db_exp_cagg_timezone_3 | t | t | t (3 rows) GRANT USAGE ON FOREIGN SERVER :DATA_NODE_1, :DATA_NODE_2, :DATA_NODE_3 TO PUBLIC; diff --git a/tsl/test/sql/CMakeLists.txt b/tsl/test/sql/CMakeLists.txt index be5cff73c..f9913192b 100644 --- a/tsl/test/sql/CMakeLists.txt +++ b/tsl/test/sql/CMakeLists.txt @@ -19,8 +19,11 @@ set(TEST_FILES_postgresql cagg_policy.sql cagg_refresh.sql cagg_watermark.sql - cagg_experimental.sql dist_views.sql + exp_cagg_next_gen.sql + exp_cagg_monthly.sql + exp_cagg_origin.sql + exp_cagg_timezone.sql move.sql partialize_finalize.sql reorder.sql @@ -54,8 +57,6 @@ if(CMAKE_BUILD_TYPE MATCHES Debug) cagg_tableam.sql cagg_usage.sql cagg_policy_run.sql - cagg_monthly.sql - cagg_with_timezone.sql data_fetcher.sql data_node_bootstrap.sql data_node.sql diff --git a/tsl/test/sql/cagg_monthly.sql b/tsl/test/sql/exp_cagg_monthly.sql similarity index 100% rename from tsl/test/sql/cagg_monthly.sql rename to tsl/test/sql/exp_cagg_monthly.sql diff --git a/tsl/test/sql/cagg_experimental.sql b/tsl/test/sql/exp_cagg_next_gen.sql similarity index 100% rename from tsl/test/sql/cagg_experimental.sql rename to tsl/test/sql/exp_cagg_next_gen.sql diff --git a/tsl/test/sql/exp_cagg_origin.sql b/tsl/test/sql/exp_cagg_origin.sql new file mode 100644 index 000000000..ed4837638 --- /dev/null +++ b/tsl/test/sql/exp_cagg_origin.sql @@ -0,0 +1,688 @@ +-- This file and its contents are licensed under the Timescale License. +-- Please see the included NOTICE for copyright information and +-- LICENSE-TIMESCALE for a copy of the license. + +CREATE TABLE conditions( + day DATE NOT NULL, + city text NOT NULL, + temperature INT NOT NULL); + +SELECT create_hypertable( + 'conditions', 'day', + chunk_time_interval => INTERVAL '1 day' +); + +INSERT INTO conditions (day, city, temperature) VALUES + ('2021-06-14', 'Moscow', 26), + ('2021-06-15', 'Moscow', 22), + ('2021-06-16', 'Moscow', 24), + ('2021-06-17', 'Moscow', 24), + ('2021-06-18', 'Moscow', 27), + ('2021-06-19', 'Moscow', 28), + ('2021-06-20', 'Moscow', 30), + ('2021-06-21', 'Moscow', 31), + ('2021-06-22', 'Moscow', 34), + ('2021-06-23', 'Moscow', 34), + ('2021-06-24', 'Moscow', 34), + ('2021-06-25', 'Moscow', 32), + ('2021-06-26', 'Moscow', 32), + ('2021-06-27', 'Moscow', 31); + +\set ON_ERROR_STOP 0 + +-- Make sure NULL can't be specified as an origin +CREATE MATERIALIZED VIEW conditions_summary_weekly +WITH (timescaledb.continuous, timescaledb.materialized_only=true) AS +SELECT city, + timescaledb_experimental.time_bucket_ng('7 days', day, null) AS bucket, + MIN(temperature), + MAX(temperature) +FROM conditions +GROUP BY city, bucket +WITH NO DATA; + +-- Make sure 'infinity' can't be specified as an origin +CREATE MATERIALIZED VIEW conditions_summary_weekly +WITH (timescaledb.continuous, timescaledb.materialized_only=true) AS +SELECT city, + timescaledb_experimental.time_bucket_ng('7 days', day, 'infinity' :: date) AS bucket, + MIN(temperature), + MAX(temperature) +FROM conditions +GROUP BY city, bucket +WITH NO DATA; + +-- For monthly buckets origin should be the first day of the month +CREATE MATERIALIZED VIEW conditions_summary_weekly +WITH (timescaledb.continuous, timescaledb.materialized_only=true) AS +SELECT city, + timescaledb_experimental.time_bucket_ng('1 month', day, '2021-06-03') AS bucket, + MIN(temperature), + MAX(temperature) +FROM conditions +GROUP BY city, bucket +WITH NO DATA; + +-- Make sure buckets like '1 months 15 days" (fixed+variable-sized) are not allowed +CREATE MATERIALIZED VIEW conditions_summary_weekly +WITH (timescaledb.continuous, timescaledb.materialized_only=true) AS +SELECT city, + timescaledb_experimental.time_bucket_ng('1 month 15 days', day, '2021-06-01') AS bucket, + MIN(temperature), + MAX(temperature) +FROM conditions +GROUP BY city, bucket +WITH NO DATA; + +\set ON_ERROR_STOP 1 + +CREATE MATERIALIZED VIEW conditions_summary_weekly +WITH (timescaledb.continuous, timescaledb.materialized_only=true) AS +SELECT city, + timescaledb_experimental.time_bucket_ng('7 days', day, '2000-01-03' :: date) AS bucket, + MIN(temperature), + MAX(temperature) +FROM conditions +GROUP BY city, bucket +WITH NO DATA; + +SELECT to_char(bucket, 'YYYY-MM-DD'), city, min, max +FROM conditions_summary_weekly +ORDER BY bucket; + +SELECT mat_hypertable_id AS cagg_id, raw_hypertable_id AS ht_id +FROM _timescaledb_catalog.continuous_agg +WHERE user_view_name = 'conditions_summary_weekly' +\gset + +-- Make sure this is treated as a variable-sized bucket case +SELECT bucket_width +FROM _timescaledb_catalog.continuous_agg +WHERE mat_hypertable_id = :cagg_id; + +-- Make sure the origin is saved in the catalog table +SELECT experimental, name, bucket_width, origin, timezone +FROM _timescaledb_catalog.continuous_aggs_bucket_function +WHERE mat_hypertable_id = :cagg_id; + +-- Make sure truncating of the refresh window works +\set ON_ERROR_STOP 0 +CALL refresh_continuous_aggregate('conditions_summary_weekly', '2021-06-14', '2021-06-20'); +\set ON_ERROR_STOP 1 + +-- Make sure refreshing works +CALL refresh_continuous_aggregate('conditions_summary_weekly', '2021-06-14', '2021-06-21'); +SELECT city, to_char(bucket, 'YYYY-MM-DD') AS week, min, max +FROM conditions_summary_weekly +ORDER BY week, city; + +-- Check the invalidation threshold +SELECT _timescaledb_internal.to_timestamp(watermark) at time zone 'UTC' +FROM _timescaledb_catalog.continuous_aggs_invalidation_threshold +WHERE hypertable_id = :ht_id; + +-- Add some dummy data for two more weeks and call refresh (no invalidations test case) +INSERT INTO conditions (day, city, temperature) +SELECT ts :: date, city, row_number() OVER () +FROM generate_series('2021-06-28' :: date, '2021-07-11', '1 day') as ts, + unnest(array['Moscow', 'Berlin']) as city; + +-- Double check generated data +SELECT to_char(day, 'YYYY-MM-DD'), city, temperature +FROM conditions +WHERE day >= '2021-06-28' +ORDER BY city DESC, day; + +-- Make sure the invalidation threshold was unaffected +SELECT _timescaledb_internal.to_timestamp(watermark) at time zone 'UTC' +FROM _timescaledb_catalog.continuous_aggs_invalidation_threshold +WHERE hypertable_id = :ht_id; + +-- Make sure the invalidation log is empty +SELECT + _timescaledb_internal.to_timestamp(lowest_modified_value) AS lowest, + _timescaledb_internal.to_timestamp(greatest_modified_value) AS greatest +FROM _timescaledb_catalog.continuous_aggs_hypertable_invalidation_log +WHERE hypertable_id = :ht_id; + +-- Call refresh +CALL refresh_continuous_aggregate('conditions_summary_weekly', '2021-06-28', '2021-07-12'); + +SELECT city, to_char(bucket, 'YYYY-MM-DD') AS week, min, max +FROM conditions_summary_weekly +ORDER BY week, city; + +-- Make sure the invalidation threshold has changed +SELECT _timescaledb_internal.to_timestamp(watermark) at time zone 'UTC' +FROM _timescaledb_catalog.continuous_aggs_invalidation_threshold +WHERE hypertable_id = :ht_id; + +-- Check if CREATE MATERIALIZED VIEW ... WITH DATA works. +-- Use monthly buckets this time and specify June 2000 as an origin. +CREATE MATERIALIZED VIEW conditions_summary_monthly +WITH (timescaledb.continuous, timescaledb.materialized_only=true) AS +SELECT city, + timescaledb_experimental.time_bucket_ng('1 month', day, '2000-06-01' :: date) AS bucket, + MIN(temperature), + MAX(temperature) +FROM conditions +GROUP BY city, bucket; + +SELECT city, to_char(bucket, 'YYYY-MM-DD') AS month, min, max +FROM conditions_summary_monthly +ORDER BY month, city; + +-- Check the invalidation. +-- Step 1/2. Insert some more data , do a refresh and make sure that the +-- invalidation log is empty. +INSERT INTO conditions (day, city, temperature) +SELECT ts :: date, city, row_number() OVER () +FROM generate_series('2021-09-01' :: date, '2021-09-15', '1 day') as ts, + unnest(array['Moscow', 'Berlin']) as city; +CALL refresh_continuous_aggregate('conditions_summary_monthly', '2021-09-01', '2021-10-01'); + +SELECT + _timescaledb_internal.to_timestamp(lowest_modified_value) AS lowest, + _timescaledb_internal.to_timestamp(greatest_modified_value) AS greatest +FROM _timescaledb_catalog.continuous_aggs_hypertable_invalidation_log +WHERE hypertable_id = :ht_id; + +SELECT city, to_char(bucket, 'YYYY-MM-DD') AS month, min, max +FROM conditions_summary_monthly +ORDER BY month, city; + +-- Step 2/2. Add more data below the invalidation threshold, make sure that the +-- invalidation log is not empty, then do a refresh. +INSERT INTO conditions (day, city, temperature) +SELECT ts :: date, city, (CASE WHEN city = 'Moscow' THEN -40 ELSE 40 END) +FROM generate_series('2021-09-16' :: date, '2021-09-30', '1 day') as ts, + unnest(array['Moscow', 'Berlin']) as city; + +SELECT + _timescaledb_internal.to_timestamp(lowest_modified_value) at time zone 'UTC' AS lowest, + _timescaledb_internal.to_timestamp(greatest_modified_value) at time zone 'UTC' AS greatest +FROM _timescaledb_catalog.continuous_aggs_hypertable_invalidation_log +WHERE hypertable_id = :ht_id; + +CALL refresh_continuous_aggregate('conditions_summary_monthly', '2021-09-01', '2021-10-01'); + +SELECT city, to_char(bucket, 'YYYY-MM-DD') AS month, min, max +FROM conditions_summary_monthly +ORDER BY month, city; + +SELECT + _timescaledb_internal.to_timestamp(lowest_modified_value) AS lowest, + _timescaledb_internal.to_timestamp(greatest_modified_value) AS greatest +FROM _timescaledb_catalog.continuous_aggs_hypertable_invalidation_log +WHERE hypertable_id = :ht_id; + +-- Create a real-time aggregate with custom origin - June 2000 +CREATE MATERIALIZED VIEW conditions_summary_rt +WITH (timescaledb.continuous) AS +SELECT city, + timescaledb_experimental.time_bucket_ng('1 month', day, '2000-06-01' :: date) AS bucket, + MIN(temperature), + MAX(temperature) +FROM conditions +GROUP BY city, bucket; + +SELECT city, to_char(bucket, 'YYYY-MM-DD') AS month, min, max +FROM conditions_summary_rt +ORDER BY month, city; + +-- Add some data to the hypertable and make sure it is visible in the cagg +INSERT INTO conditions (day, city, temperature) VALUES + ('2021-10-01', 'Moscow', 1), + ('2021-10-02', 'Moscow', 2), + ('2021-10-03', 'Moscow', 3), + ('2021-10-04', 'Moscow', 4), + ('2021-10-01', 'Berlin', 5), + ('2021-10-02', 'Berlin', 6), + ('2021-10-03', 'Berlin', 7), + ('2021-10-04', 'Berlin', 8); + +SELECT city, to_char(bucket, 'YYYY-MM-DD') AS month, min, max +FROM conditions_summary_rt +ORDER BY month, city; + +-- Refresh the cagg and make sure that the result of SELECT query didn't change +CALL refresh_continuous_aggregate('conditions_summary_rt', '2021-10-01', '2021-11-01'); + +SELECT city, to_char(bucket, 'YYYY-MM-DD') AS month, min, max +FROM conditions_summary_rt +ORDER BY month, city; + +-- Add some more data, enable compression, compress the chunks and repeat the test + +INSERT INTO conditions (day, city, temperature) VALUES + ('2021-11-01', 'Moscow', 11), + ('2021-11-02', 'Moscow', 12), + ('2021-11-03', 'Moscow', 13), + ('2021-11-04', 'Moscow', 14), + ('2021-11-01', 'Berlin', 15), + ('2021-11-02', 'Berlin', 16), + ('2021-11-03', 'Berlin', 17), + ('2021-11-04', 'Berlin', 18); + +ALTER TABLE conditions SET ( + timescaledb.compress, + timescaledb.compress_segmentby = 'city' +); + +SELECT compress_chunk(ch) FROM show_chunks('conditions') AS ch; + +-- Data for 2021-11 is seen because the cagg is real-time +SELECT city, to_char(bucket, 'YYYY-MM-DD') AS month, min, max +FROM conditions_summary_rt +ORDER BY month, city; + +CALL refresh_continuous_aggregate('conditions_summary_rt', '2021-11-01', '2021-12-01'); + +-- Data for 2021-11 is seen because the cagg was refreshed +SELECT city, to_char(bucket, 'YYYY-MM-DD') AS month, min, max +FROM conditions_summary_rt +ORDER BY month, city; + +-- Clean up +DROP TABLE conditions CASCADE; + +-- Test caggs with monthly buckets and custom origin on top of distributed hypertable +\c :TEST_DBNAME :ROLE_CLUSTER_SUPERUSER +\set DATA_NODE_1 :TEST_DBNAME _1 +\set DATA_NODE_2 :TEST_DBNAME _2 +\set DATA_NODE_3 :TEST_DBNAME _3 + +SELECT (add_data_node (name, host => 'localhost', DATABASE => name)).* +FROM (VALUES (:'DATA_NODE_1'), (:'DATA_NODE_2'), (:'DATA_NODE_3')) v (name); + +GRANT USAGE ON FOREIGN SERVER :DATA_NODE_1, :DATA_NODE_2, :DATA_NODE_3 TO PUBLIC; + +SET ROLE :ROLE_DEFAULT_PERM_USER; + +CREATE TABLE conditions_dist( + day date NOT NULL, + temperature INT NOT NULL); + +SELECT table_name FROM create_distributed_hypertable('conditions_dist', 'day', chunk_time_interval => INTERVAL '1 day'); + +INSERT INTO conditions_dist(day, temperature) +SELECT ts, date_part('month', ts)*100 + date_part('day', ts) +FROM generate_series('2010-01-01' :: date, '2010-03-01' :: date - interval '1 day', '1 day') as ts; + +CREATE MATERIALIZED VIEW conditions_dist_1m +WITH (timescaledb.continuous) AS +SELECT + timescaledb_experimental.time_bucket_ng('1 month', day, '2010-01-01') AS bucket, + MIN(temperature), + MAX(temperature) +FROM conditions_dist +GROUP BY bucket; + +SELECT mat_hypertable_id AS cagg_id, raw_hypertable_id AS ht_id +FROM _timescaledb_catalog.continuous_agg +WHERE user_view_name = 'conditions_dist_1m' +\gset + +SELECT bucket_width +FROM _timescaledb_catalog.continuous_agg +WHERE mat_hypertable_id = :cagg_id; + +SELECT experimental, name, bucket_width, origin, timezone +FROM _timescaledb_catalog.continuous_aggs_bucket_function +WHERE mat_hypertable_id = :cagg_id; + +SELECT * FROM conditions_dist_1m ORDER BY bucket; + +-- Same test but with non-realtime, NO DATA aggregate and manual refresh + +CREATE MATERIALIZED VIEW conditions_dist_1m_manual +WITH (timescaledb.continuous, timescaledb.materialized_only=true) AS +SELECT + timescaledb_experimental.time_bucket_ng('1 month', day, '2005-01-01') AS bucket, + MIN(temperature), + MAX(temperature) +FROM conditions_dist +GROUP BY bucket +WITH NO DATA; + +SELECT * FROM conditions_dist_1m_manual ORDER BY bucket; + +CALL refresh_continuous_aggregate('conditions_dist_1m_manual', '2010-01-01', '2010-03-01'); +SELECT * FROM conditions_dist_1m_manual ORDER BY bucket; + +-- Check invalidation for caggs on top of distributed hypertable + +INSERT INTO conditions_dist(day, temperature) +VALUES ('2010-01-15', 999), ('2010-02-15', -999), ('2010-03-01', 15); + +SELECT * FROM conditions_dist_1m ORDER BY bucket; +SELECT * FROM conditions_dist_1m_manual ORDER BY bucket; + +CALL refresh_continuous_aggregate('conditions_dist_1m', '2010-01-01', '2010-04-01'); +SELECT * FROM conditions_dist_1m ORDER BY bucket; +SELECT * FROM conditions_dist_1m_manual ORDER BY bucket; + +CALL refresh_continuous_aggregate('conditions_dist_1m_manual', '2010-01-01', '2010-04-01'); +SELECT * FROM conditions_dist_1m ORDER BY bucket; +SELECT * FROM conditions_dist_1m_manual ORDER BY bucket; + +-- Compression on top of distributed hypertables + +ALTER MATERIALIZED VIEW conditions_dist_1m_manual SET ( timescaledb.compress ); + +SELECT compress_chunk(ch) +FROM show_chunks('conditions_dist_1m_manual') ch limit 1; + +SELECT * FROM conditions_dist_1m_manual ORDER BY bucket; + +-- Clean up +DROP TABLE conditions_dist CASCADE; + +\c :TEST_DBNAME :ROLE_CLUSTER_SUPERUSER +SELECT delete_data_node(name) +FROM (VALUES (:'DATA_NODE_1'), (:'DATA_NODE_2'), (:'DATA_NODE_3')) v (name); +SET ROLE :ROLE_DEFAULT_PERM_USER; + +-- Test the specific code path of creating a CAGG on top of empty hypertable. + +CREATE TABLE conditions_empty( + day DATE NOT NULL, + city text NOT NULL, + temperature INT NOT NULL); + +SELECT create_hypertable( + 'conditions_empty', 'day', + chunk_time_interval => INTERVAL '1 day' +); + +CREATE MATERIALIZED VIEW conditions_summary_empty +WITH (timescaledb.continuous, timescaledb.materialized_only=true) AS +SELECT city, + timescaledb_experimental.time_bucket_ng('1 month', day, '2005-02-01') AS bucket, + MIN(temperature), + MAX(temperature) +FROM conditions_empty +GROUP BY city, bucket; + +SELECT city, to_char(bucket, 'YYYY-MM-DD') AS month, min, max +FROM conditions_summary_empty +ORDER BY month, city; + +-- The test above changes the record that gets added to the invalidation log +-- for an empty table. Make sure it doesn't have any unintended side-effects +-- and the refreshing works as expected. + +INSERT INTO conditions_empty (day, city, temperature) VALUES + ('2021-06-14', 'Moscow', 26), + ('2021-06-15', 'Moscow', 22), + ('2021-06-16', 'Moscow', 24), + ('2021-06-17', 'Moscow', 24), + ('2021-06-18', 'Moscow', 27), + ('2021-06-19', 'Moscow', 28), + ('2021-06-20', 'Moscow', 30), + ('2021-06-21', 'Moscow', 31), + ('2021-06-22', 'Moscow', 34), + ('2021-06-23', 'Moscow', 34), + ('2021-06-24', 'Moscow', 34), + ('2021-06-25', 'Moscow', 32), + ('2021-06-26', 'Moscow', 32), + ('2021-06-27', 'Moscow', 31); + +CALL refresh_continuous_aggregate('conditions_summary_empty', '2021-06-01', '2021-07-01'); + +SELECT city, to_char(bucket, 'YYYY-MM-DD') AS month, min, max +FROM conditions_summary_empty +ORDER BY month, city; + +-- Clean up +DROP TABLE conditions_empty CASCADE; + +-- Make sure add_continuous_aggregate_policy() works + +CREATE TABLE conditions_policy( + day DATE NOT NULL, + city text NOT NULL, + temperature INT NOT NULL); + +SELECT create_hypertable( + 'conditions_policy', 'day', + chunk_time_interval => INTERVAL '1 day' +); + +INSERT INTO conditions_policy (day, city, temperature) VALUES + ('2021-06-14', 'Moscow', 26), + ('2021-06-15', 'Moscow', 22), + ('2021-06-16', 'Moscow', 24), + ('2021-06-17', 'Moscow', 24), + ('2021-06-18', 'Moscow', 27), + ('2021-06-19', 'Moscow', 28), + ('2021-06-20', 'Moscow', 30), + ('2021-06-21', 'Moscow', 31), + ('2021-06-22', 'Moscow', 34), + ('2021-06-23', 'Moscow', 34), + ('2021-06-24', 'Moscow', 34), + ('2021-06-25', 'Moscow', 32), + ('2021-06-26', 'Moscow', 32), + ('2021-06-27', 'Moscow', 31); + +CREATE MATERIALIZED VIEW conditions_summary_policy +WITH (timescaledb.continuous, timescaledb.materialized_only=true) AS +SELECT city, + timescaledb_experimental.time_bucket_ng('1 month', day, '2005-03-01') AS bucket, + MIN(temperature), + MAX(temperature) +FROM conditions_policy +GROUP BY city, bucket; + +SELECT * FROM conditions_summary_policy; + +\set ON_ERROR_STOP 0 +-- Check for "policy refresh window too small" error +SELECT add_continuous_aggregate_policy('conditions_summary_policy', + -- Historically, 1 month is just a synonym to 30 days here. + -- See interval_to_int64() and interval_to_int128(). + start_offset => INTERVAL '2 months', + end_offset => INTERVAL '1 day', + schedule_interval => INTERVAL '1 hour'); +\set ON_ERROR_STOP 1 + +SELECT add_continuous_aggregate_policy('conditions_summary_policy', + start_offset => INTERVAL '65 days', + end_offset => INTERVAL '1 day', + schedule_interval => INTERVAL '1 hour'); + +-- Clean up +DROP TABLE conditions_policy CASCADE; + +-- Make sure CAGGs with custom origin work for timestamp type + +CREATE TABLE conditions_timestamp( + tstamp TIMESTAMP NOT NULL, + city TEXT NOT NULL, + temperature INT NOT NULL); + +SELECT create_hypertable( + 'conditions_timestamp', 'tstamp', + chunk_time_interval => INTERVAL '1 day' +); + +CREATE MATERIALIZED VIEW conditions_summary_timestamp +WITH (timescaledb.continuous) AS +SELECT city, + timescaledb_experimental.time_bucket_ng('12 hours', tstamp, '2000-06-01 12:00:00') AS bucket, + MIN(temperature), + MAX(temperature) +FROM conditions_timestamp +GROUP BY city, bucket; + +SELECT city, to_char(bucket, 'YYYY-MM-DD HH24:MI:SS') AS b, min, max +FROM conditions_summary_timestamp +ORDER BY b, city; + +-- Add some data to the hypertable and make sure it is visible in the cagg +INSERT INTO conditions_timestamp(tstamp, city, temperature) +SELECT ts, city, (CASE WHEN city = 'Moscow' THEN 20000 ELSE 10000 END) + date_part('day', ts)*100 + date_part('hour', ts) +FROM + generate_series('2010-01-01 00:00:00' :: timestamp, '2010-01-02 00:00:00' :: timestamp - interval '1 hour', '1 hour') as ts, + unnest(array['Moscow', 'Berlin']) as city; + +SELECT city, to_char(bucket, 'YYYY-MM-DD HH24:MI:SS') AS b, min, max +FROM conditions_summary_timestamp +ORDER BY b, city; + +-- Refresh the cagg and make sure that the result of SELECT query didn't change +CALL refresh_continuous_aggregate('conditions_summary_timestamp', '2010-01-01 00:00:00', '2010-01-02 00:00:00'); + +SELECT city, to_char(bucket, 'YYYY-MM-DD HH24:MI:SS') AS b, min, max +FROM conditions_summary_timestamp +ORDER BY b, city; + +-- Add some more data, enable compression, compress the chunks and repeat the test + +INSERT INTO conditions_timestamp(tstamp, city, temperature) +SELECT ts, city, (CASE WHEN city = 'Moscow' THEN 20000 ELSE 10000 END) + date_part('day', ts)*100 + date_part('hour', ts) +FROM + generate_series('2010-01-02 00:00:00' :: timestamp, '2010-01-03 00:00:00' :: timestamp - interval '1 hour', '1 hour') as ts, + unnest(array['Moscow', 'Berlin']) as city; + +ALTER TABLE conditions_timestamp SET ( + timescaledb.compress, + timescaledb.compress_segmentby = 'city' +); + +SELECT compress_chunk(ch) FROM show_chunks('conditions_timestamp') AS ch; + +-- New data is seen because the cagg is real-time +SELECT city, to_char(bucket, 'YYYY-MM-DD HH24:MI:SS') AS b, min, max +FROM conditions_summary_timestamp +ORDER BY b, city; + +CALL refresh_continuous_aggregate('conditions_summary_timestamp', '2010-01-02 00:00:00', '2010-01-03 00:00:00'); + +-- New data is seen because the cagg was refreshed +SELECT city, to_char(bucket, 'YYYY-MM-DD HH24:MI:SS') AS b, min, max +FROM conditions_summary_timestamp +ORDER BY b, city; + +-- Add a refresh policy +SELECT add_continuous_aggregate_policy('conditions_summary_timestamp', + start_offset => INTERVAL '25 hours', + end_offset => INTERVAL '1 hour', + schedule_interval => INTERVAL '30 minutes'); + +-- Clean up +DROP TABLE conditions_timestamp CASCADE; + +-- Make sure CAGGs with custom origin work for timestamptz type + +CREATE TABLE conditions_timestamptz( + tstamp TIMESTAMPTZ NOT NULL, + city TEXT NOT NULL, + temperature INT NOT NULL); + +SELECT create_hypertable( + 'conditions_timestamptz', 'tstamp', + chunk_time_interval => INTERVAL '1 day' +); + +\set ON_ERROR_STOP 0 + +-- For monthly buckets origin should be the first day of the month in given timezone +-- 2020-06-02 00:00:00 MSK == 2020-06-01 21:00:00 UTC +CREATE MATERIALIZED VIEW conditions_summary_timestamptz +WITH (timescaledb.continuous, timescaledb.materialized_only=true) AS +SELECT city, + timescaledb_experimental.time_bucket_ng('1 month', tstamp, '2020-06-02 00:00:00 MSK', 'Europe/Moscow') AS bucket, + MIN(temperature), + MAX(temperature) +FROM conditions_timestamptz +GROUP BY city, bucket; + +-- Make sure buckets like '1 months 15 days" (fixed+variable-sized) are not allowed +CREATE MATERIALIZED VIEW conditions_summary_timestamptz +WITH (timescaledb.continuous, timescaledb.materialized_only=true) AS +SELECT city, + timescaledb_experimental.time_bucket_ng('1 month 15 days', tstamp, '2020-06-01 00:00:00 MSK', 'Europe/Moscow') AS bucket, + MIN(temperature), + MAX(temperature) +FROM conditions_timestamptz +GROUP BY city, bucket; + +\set ON_ERROR_STOP 1 + +CREATE MATERIALIZED VIEW conditions_summary_timestamptz +WITH (timescaledb.continuous) AS +SELECT city, + timescaledb_experimental.time_bucket_ng('12 hours', tstamp, '2020-06-01 12:00:00 MSK', 'Europe/Moscow') AS bucket, + MIN(temperature), + MAX(temperature) +FROM conditions_timestamptz +GROUP BY city, bucket; + +SELECT city, to_char(bucket at time zone 'MSK', 'YYYY-MM-DD HH24:MI:SS') AS b, min, max +FROM conditions_summary_timestamptz +ORDER BY b, city; + +-- Add some data to the hypertable and make sure it is visible in the cagg +INSERT INTO conditions_timestamptz(tstamp, city, temperature) +SELECT ts, city, + (CASE WHEN city = 'Moscow' THEN 20000 ELSE 10000 END) + + date_part('day', ts at time zone 'MSK')*100 + + date_part('hour', ts at time zone 'MSK') +FROM + generate_series('2022-01-01 00:00:00 MSK' :: timestamptz, '2022-01-02 00:00:00 MSK' :: timestamptz - interval '1 hour', '1 hour') as ts, + unnest(array['Moscow', 'Berlin']) as city; + +-- Check the data +SELECT to_char(tstamp at time zone 'MSK', 'YYYY-MM-DD HH24:MI:SS') AS ts, city, temperature FROM conditions_timestamptz +ORDER BY ts, city; + +SELECT city, to_char(bucket at time zone 'MSK', 'YYYY-MM-DD HH24:MI:SS') AS b, min, max +FROM conditions_summary_timestamptz +ORDER BY b, city; + +-- Refresh the cagg and make sure that the result of SELECT query didn't change +CALL refresh_continuous_aggregate('conditions_summary_timestamptz', '2022-01-01 00:00:00 MSK', '2022-01-02 00:00:00 MSK'); + +SELECT city, to_char(bucket at time zone 'MSK', 'YYYY-MM-DD HH24:MI:SS') AS b, min, max +FROM conditions_summary_timestamptz +ORDER BY b, city; + +-- Add some more data, enable compression, compress the chunks and repeat the test + +INSERT INTO conditions_timestamptz(tstamp, city, temperature) +SELECT ts, city, + (CASE WHEN city = 'Moscow' THEN 20000 ELSE 10000 END) + + date_part('day', ts at time zone 'MSK')*100 + + date_part('hour', ts at time zone 'MSK') +FROM + generate_series('2022-01-02 00:00:00 MSK' :: timestamptz, '2022-01-03 00:00:00 MSK' :: timestamptz - interval '1 hour', '1 hour') as ts, + unnest(array['Moscow', 'Berlin']) as city; + +ALTER TABLE conditions_timestamptz SET ( + timescaledb.compress, + timescaledb.compress_segmentby = 'city' +); + +SELECT compress_chunk(ch) FROM show_chunks('conditions_timestamptz') AS ch; + +-- New data is seen because the cagg is real-time +SELECT city, to_char(bucket at time zone 'MSK', 'YYYY-MM-DD HH24:MI:SS') AS b, min, max +FROM conditions_summary_timestamptz +ORDER BY b, city; + +CALL refresh_continuous_aggregate('conditions_summary_timestamptz', '2022-01-02 00:00:00 MSK', '2022-01-03 00:00:00 MSK'); + +-- New data is seen because the cagg was refreshed +SELECT city, to_char(bucket at time zone 'MSK', 'YYYY-MM-DD HH24:MI:SS') AS b, min, max +FROM conditions_summary_timestamptz +ORDER BY b, city; + +-- Add a refresh policy +SELECT add_continuous_aggregate_policy('conditions_summary_timestamptz', + start_offset => INTERVAL '25 hours', + end_offset => INTERVAL '1 hour', + schedule_interval => INTERVAL '30 minutes'); + +-- Clean up +DROP TABLE conditions_timestamptz CASCADE; diff --git a/tsl/test/sql/cagg_with_timezone.sql b/tsl/test/sql/exp_cagg_timezone.sql similarity index 100% rename from tsl/test/sql/cagg_with_timezone.sql rename to tsl/test/sql/exp_cagg_timezone.sql