From 9103d697fb3cdbae3450eaa596d66df4cf289b61 Mon Sep 17 00:00:00 2001 From: Aleksander Alekseev Date: Tue, 11 Jan 2022 12:06:34 +0300 Subject: [PATCH] Don't allow using buckets like '1 month 15 days' + some refactorings This is in fact a backport from the "Buckets with timezones" feature branch. While working on the feature a bug was discovered. We allow creating buckets like '1 month 15 days', i.e. fixed-sized + variable-sized, which is supposed to be forbidden. This patch fixes the bug and also simplifies code a little. timezone_in / timezone_out procedures are used instead of snprintf/scanf. Also, the CAggTimebucketInfo structure was changed slightly. These changes are going to be needed for timezones support anyway. --- src/continuous_agg.c | 22 ++--- src/utils.c | 22 ----- src/utils.h | 2 - tsl/src/continuous_aggs/create.c | 86 ++++++++++++------- .../continuous_aggs_variable_size_buckets.out | 17 +++- .../continuous_aggs_variable_size_buckets.sql | 13 +++ 6 files changed, 91 insertions(+), 71 deletions(-) diff --git a/src/continuous_agg.c b/src/continuous_agg.c index f8f795254..ec990c013 100644 --- a/src/continuous_agg.c +++ b/src/continuous_agg.c @@ -1496,7 +1496,7 @@ ts_continuous_agg_bucket_width(const ContinuousAgg *agg) int32 ts_bucket_function_to_bucket_width_in_months(const ContinuousAggsBucketFunction *bf) { - long long int nmonths; + Interval *interval; /* bucket_function should always be filled for variable buckets */ Assert(NULL != bf); @@ -1508,19 +1508,13 @@ ts_bucket_function_to_bucket_width_in_months(const ContinuousAggsBucketFunction /* variable buckets with specified timezone are not supported */ Assert(0 == strlen(bf->timezone)); - /* - * The definition of int32 (and PRId32) differ across the platforms. - * We have to implicitly cast it to long long to make %lld always work. - */ - if (sscanf(bf->bucket_width, "%lld months", &nmonths) < 1) - { - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("unexpected bucket width \"%s\"", bf->bucket_width))); - } + interval = DatumGetIntervalP( + DirectFunctionCall3(interval_in, CStringGetDatum(bf->bucket_width), InvalidOid, -1)); - /* See the corresponding check in cagg_create() */ - Assert(nmonths <= PG_INT32_MAX); + /* make sure this function is called only in the right contexts */ + Assert(interval->month != 0); + Assert(interval->day == 0); + Assert(interval->time == 0); - return (int32) nmonths; + return interval->month; } diff --git a/src/utils.c b/src/utils.c index 4947ba693..4ac48335d 100644 --- a/src/utils.c +++ b/src/utils.c @@ -204,28 +204,6 @@ ts_interval_value_to_internal(Datum time_val, Oid type_oid) } } -/* - * Similar to ts_interval_value_to_internal() but for monthly interval returns - * the number of months and *months=true. Otherwise returns *months=false and - * the result of ts_interval_value_to_internal() - */ -int64 -ts_interval_value_to_internal_or_months(Datum time_val, Oid type_oid, bool *months) -{ - if (type_oid == INTERVALOID) - { - Interval *interval = DatumGetIntervalP(time_val); - if (interval->month != 0) - { - *months = true; - return interval->month; - } - } - - *months = false; - return ts_interval_value_to_internal(time_val, type_oid); -} - static int64 ts_integer_to_internal(Datum time_val, Oid type_oid) { diff --git a/src/utils.h b/src/utils.h index 48a922c22..70f125aa2 100644 --- a/src/utils.h +++ b/src/utils.h @@ -62,8 +62,6 @@ extern int64 ts_time_value_to_internal_or_infinite(Datum time_val, Oid type_oid, TimevalInfinity *is_infinite_out); extern TSDLLEXPORT int64 ts_interval_value_to_internal(Datum time_val, Oid type_oid); -extern TSDLLEXPORT int64 ts_interval_value_to_internal_or_months(Datum time_val, Oid type_oid, - bool *months); /* * Convert a column from the internal time representation into the specified type diff --git a/tsl/src/continuous_aggs/create.c b/tsl/src/continuous_aggs/create.c index 728e2e72d..781602b89 100644 --- a/tsl/src/continuous_aggs/create.c +++ b/tsl/src/continuous_aggs/create.c @@ -177,8 +177,9 @@ typedef struct CAggTimebucketInfo /* This should also be the column used by time_bucket */ Oid htpartcoltype; int64 htpartcol_interval_len; /* interval length setting for primary partitioning column */ - int64 bucket_width; /* bucket_width of time_bucket */ - bool bucket_width_months; /* if true, bucket_width stores the amount of months */ + int64 bucket_width; /* bucket_width of time_bucket, stores BUCKET_WIDHT_VARIABLE for + variable-sized buckets */ + Interval *interval; /* stores the interval, NULL if not specified */ } CAggTimebucketInfo; typedef struct AggPartCxt @@ -670,17 +671,27 @@ caggtimebucketinfo_init(CAggTimebucketInfo *src, int32 hypertable_id, Oid hypert src->htpartcolno = hypertable_partition_colno; src->htpartcoltype = hypertable_partition_coltype; src->htpartcol_interval_len = hypertable_partition_col_interval; - src->bucket_width = 0; /*invalid value */ + src->bucket_width = 0; /* invalid value */ + src->interval = NULL; /* not specified by default */ } -/* Check if the group-by clauses has exactly 1 time_bucket(.., ) - * where is the hypertable's partitioning column. +/* + * Check if the group-by clauses has exactly 1 time_bucket(.., ) where + * is the hypertable's partitioning column and other invariants. Then fill + * the `bucket_width` and other fields of `tbinfo`. */ static void caggtimebucket_validate(CAggTimebucketInfo *tbinfo, List *groupClause, List *targetList) { ListCell *l; bool found = false; + + /* + * Make sure bucket_width was initialized by caggtimebucketinfo_init(). + * This assumption is used below. + */ + Assert(tbinfo->bucket_width == 0); + foreach (l, groupClause) { SortGroupClause *sgc = (SortGroupClause *) lfirst(l); @@ -721,11 +732,19 @@ caggtimebucket_validate(CAggTimebucketInfo *tbinfo, List *groupClause, List *tar if (IsA(width_arg, Const)) { Const *width = castNode(Const, width_arg); + if (width->consttype == INTERVALOID) + { + tbinfo->interval = DatumGetIntervalP(width->constvalue); + if (tbinfo->interval->month != 0) + tbinfo->bucket_width = BUCKET_WIDTH_VARIABLE; + } - tbinfo->bucket_width = - ts_interval_value_to_internal_or_months(width->constvalue, - width->consttype, - &tbinfo->bucket_width_months); + if (tbinfo->bucket_width != BUCKET_WIDTH_VARIABLE) + { + /* The bucket size is fixed */ + tbinfo->bucket_width = + ts_interval_value_to_internal(width->constvalue, width->consttype); + } } else ereport(ERROR, @@ -736,6 +755,22 @@ caggtimebucket_validate(CAggTimebucketInfo *tbinfo, List *groupClause, List *tar } } + if (tbinfo->bucket_width == BUCKET_WIDTH_VARIABLE) + { + /* variable-sized buckets can be used only with intervals */ + Assert(tbinfo->interval != NULL); + + if ((tbinfo->interval->month != 0) && + ((tbinfo->interval->day != 0) || (tbinfo->interval->time != 0))) + { + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("invalid interval specified"), + errhint("Use either months or days and hours, but not months, days and hours " + "together"))); + } + } + if (!found) elog(ERROR, "continuous aggregate view must include a valid time bucket function"); } @@ -1237,9 +1272,8 @@ get_partialize_funcexpr(Aggref *agg) } /* - * check if the supplied oid belongs to a valid bucket function - * for continuous aggregates - * We only support 2-arg variants of time_bucket + * Check if the supplied OID belongs to a valid bucket function + * for continuous aggregates. */ static bool is_valid_bucketing_function(Oid funcid) @@ -1896,38 +1930,28 @@ cagg_create(const CreateTableAsStmt *create_stmt, ViewStmt *stmt, Query *panquer create_view_for_query(orig_userview_query, dum_rel); /* Step 4 add catalog table entry for the objects we just created */ nspid = RangeVarGetCreationNamespace(stmt->view); + create_cagg_catalog_entry(materialize_hypertable_id, origquery_ht->htid, get_namespace_name(nspid), /*schema name for user view */ stmt->view->relname, part_rel->schemaname, part_rel->relname, - origquery_ht->bucket_width_months ? BUCKET_WIDTH_VARIABLE : - origquery_ht->bucket_width, + origquery_ht->bucket_width, materialized_only, dum_rel->schemaname, dum_rel->relname); - if (origquery_ht->bucket_width_months) + if (origquery_ht->bucket_width == BUCKET_WIDTH_VARIABLE) { - char bucket_width[32]; - /* PostgreSQL's Interval type stores the number of months as int32 */ - if (!(origquery_ht->bucket_width <= PG_INT32_MAX)) - { - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("number of months exceeds PG_INT32_MAX (%d)", PG_INT32_MAX))); - } + const char *bucket_width; /* - * The definition of int64 (and PRId64) differ across the platforms. - * We have to implicitly cast it to long long to make %lld always work. - * pg_snprintf and port.h don't seem to have a solution for this. + * Variable-sized buckets work only with intervals. */ - snprintf(bucket_width, - sizeof(bucket_width), - "%lld months", - (long long int) origquery_ht->bucket_width); + Assert(origquery_ht->interval != NULL); + bucket_width = DatumGetCString( + DirectFunctionCall1(interval_out, IntervalPGetDatum(origquery_ht->interval))); /* * `experimental` = true and `name` = "time_bucket_ng" are hardcoded @@ -2011,7 +2035,7 @@ tsl_process_continuous_agg_viewstmt(Node *node, const char *query_string, void * /* We are creating a refresh window here in a similar way to how it's * done in continuous_agg_refresh. We do not call the PG function - * directly since we want to be able to surpress the output in that + * directly since we want to be able to suppress the output in that * function and adding a 'verbose' parameter to is not useful for a * user. */ relid = get_relname_relid(stmt->into->rel->relname, nspid); diff --git a/tsl/test/expected/continuous_aggs_variable_size_buckets.out b/tsl/test/expected/continuous_aggs_variable_size_buckets.out index 5943d466e..118aff697 100644 --- a/tsl/test/expected/continuous_aggs_variable_size_buckets.out +++ b/tsl/test/expected/continuous_aggs_variable_size_buckets.out @@ -29,6 +29,19 @@ INSERT INTO conditions (day, city, temperature) VALUES ('2021-06-25', 'Moscow', 32), ('2021-06-26', 'Moscow', 32), ('2021-06-27', 'Moscow', 31); +-- Check that buckets like '1 month 15 days' (fixed-sized + variable-sized) are not allowed +\set ON_ERROR_STOP 0 +CREATE MATERIALIZED VIEW conditions_summary +WITH (timescaledb.continuous, timescaledb.materialized_only=true) AS +SELECT city, + timescaledb_experimental.time_bucket_ng('1 month 15 days', day) AS bucket, + MIN(temperature), + MAX(temperature) +FROM conditions +GROUP BY city, bucket +WITH NO DATA; +ERROR: invalid interval specified +\set ON_ERROR_STOP 1 -- Make sure it's possible to create an empty cagg (WITH NO DATA) and -- that all the information about the bucketing function will be saved -- to the TS catalog. @@ -62,7 +75,7 @@ FROM _timescaledb_catalog.continuous_aggs_bucket_function WHERE mat_hypertable_id = :cagg_id; experimental | name | bucket_width | origin | timezone --------------+----------------+--------------+--------+---------- - t | time_bucket_ng | 1 months | | + t | time_bucket_ng | @ 1 mon | | (1 row) -- Check that there is no saved invalidation threshold before any refreshes @@ -1071,7 +1084,7 @@ FROM _timescaledb_catalog.continuous_aggs_bucket_function WHERE mat_hypertable_id = :cagg_id; experimental | name | bucket_width | origin | timezone --------------+----------------+--------------+--------+---------- - t | time_bucket_ng | 1 months | | + t | time_bucket_ng | @ 1 mon | | (1 row) SELECT * FROM conditions_dist_1m ORDER BY bucket; diff --git a/tsl/test/sql/continuous_aggs_variable_size_buckets.sql b/tsl/test/sql/continuous_aggs_variable_size_buckets.sql index 40bcb3f04..41cd5b512 100644 --- a/tsl/test/sql/continuous_aggs_variable_size_buckets.sql +++ b/tsl/test/sql/continuous_aggs_variable_size_buckets.sql @@ -28,6 +28,19 @@ INSERT INTO conditions (day, city, temperature) VALUES ('2021-06-26', 'Moscow', 32), ('2021-06-27', 'Moscow', 31); +-- Check that buckets like '1 month 15 days' (fixed-sized + variable-sized) are not allowed + +\set ON_ERROR_STOP 0 +CREATE MATERIALIZED VIEW conditions_summary +WITH (timescaledb.continuous, timescaledb.materialized_only=true) AS +SELECT city, + timescaledb_experimental.time_bucket_ng('1 month 15 days', day) AS bucket, + MIN(temperature), + MAX(temperature) +FROM conditions +GROUP BY city, bucket +WITH NO DATA; +\set ON_ERROR_STOP 1 -- Make sure it's possible to create an empty cagg (WITH NO DATA) and -- that all the information about the bucketing function will be saved