Don't allow using buckets like '1 month 15 days' + some refactorings

This is in fact a backport from the "Buckets with timezones" feature branch.
While working on the feature a bug was discovered. We allow creating buckets
like '1 month 15 days', i.e. fixed-sized + variable-sized, which is supposed to
be forbidden.

This patch fixes the bug and also simplifies code a little. timezone_in /
timezone_out procedures are used instead of snprintf/scanf. Also, the
CAggTimebucketInfo structure was changed slightly. These changes are going to be
needed for timezones support anyway.
This commit is contained in:
Aleksander Alekseev 2022-01-11 12:06:34 +03:00 committed by Aleksander Alekseev
parent 2a2b394172
commit 9103d697fb
6 changed files with 91 additions and 71 deletions

View File

@ -1496,7 +1496,7 @@ ts_continuous_agg_bucket_width(const ContinuousAgg *agg)
int32 int32
ts_bucket_function_to_bucket_width_in_months(const ContinuousAggsBucketFunction *bf) ts_bucket_function_to_bucket_width_in_months(const ContinuousAggsBucketFunction *bf)
{ {
long long int nmonths; Interval *interval;
/* bucket_function should always be filled for variable buckets */ /* bucket_function should always be filled for variable buckets */
Assert(NULL != bf); Assert(NULL != bf);
@ -1508,19 +1508,13 @@ ts_bucket_function_to_bucket_width_in_months(const ContinuousAggsBucketFunction
/* variable buckets with specified timezone are not supported */ /* variable buckets with specified timezone are not supported */
Assert(0 == strlen(bf->timezone)); Assert(0 == strlen(bf->timezone));
/* interval = DatumGetIntervalP(
* The definition of int32 (and PRId32) differ across the platforms. DirectFunctionCall3(interval_in, CStringGetDatum(bf->bucket_width), InvalidOid, -1));
* We have to implicitly cast it to long long to make %lld always work.
*/
if (sscanf(bf->bucket_width, "%lld months", &nmonths) < 1)
{
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("unexpected bucket width \"%s\"", bf->bucket_width)));
}
/* See the corresponding check in cagg_create() */ /* make sure this function is called only in the right contexts */
Assert(nmonths <= PG_INT32_MAX); Assert(interval->month != 0);
Assert(interval->day == 0);
Assert(interval->time == 0);
return (int32) nmonths; return interval->month;
} }

View File

@ -204,28 +204,6 @@ ts_interval_value_to_internal(Datum time_val, Oid type_oid)
} }
} }
/*
* Similar to ts_interval_value_to_internal() but for monthly interval returns
* the number of months and *months=true. Otherwise returns *months=false and
* the result of ts_interval_value_to_internal()
*/
int64
ts_interval_value_to_internal_or_months(Datum time_val, Oid type_oid, bool *months)
{
if (type_oid == INTERVALOID)
{
Interval *interval = DatumGetIntervalP(time_val);
if (interval->month != 0)
{
*months = true;
return interval->month;
}
}
*months = false;
return ts_interval_value_to_internal(time_val, type_oid);
}
static int64 static int64
ts_integer_to_internal(Datum time_val, Oid type_oid) ts_integer_to_internal(Datum time_val, Oid type_oid)
{ {

View File

@ -62,8 +62,6 @@ extern int64 ts_time_value_to_internal_or_infinite(Datum time_val, Oid type_oid,
TimevalInfinity *is_infinite_out); TimevalInfinity *is_infinite_out);
extern TSDLLEXPORT int64 ts_interval_value_to_internal(Datum time_val, Oid type_oid); extern TSDLLEXPORT int64 ts_interval_value_to_internal(Datum time_val, Oid type_oid);
extern TSDLLEXPORT int64 ts_interval_value_to_internal_or_months(Datum time_val, Oid type_oid,
bool *months);
/* /*
* Convert a column from the internal time representation into the specified type * Convert a column from the internal time representation into the specified type

View File

@ -177,8 +177,9 @@ typedef struct CAggTimebucketInfo
/* This should also be the column used by time_bucket */ /* This should also be the column used by time_bucket */
Oid htpartcoltype; Oid htpartcoltype;
int64 htpartcol_interval_len; /* interval length setting for primary partitioning column */ int64 htpartcol_interval_len; /* interval length setting for primary partitioning column */
int64 bucket_width; /* bucket_width of time_bucket */ int64 bucket_width; /* bucket_width of time_bucket, stores BUCKET_WIDHT_VARIABLE for
bool bucket_width_months; /* if true, bucket_width stores the amount of months */ variable-sized buckets */
Interval *interval; /* stores the interval, NULL if not specified */
} CAggTimebucketInfo; } CAggTimebucketInfo;
typedef struct AggPartCxt typedef struct AggPartCxt
@ -670,17 +671,27 @@ caggtimebucketinfo_init(CAggTimebucketInfo *src, int32 hypertable_id, Oid hypert
src->htpartcolno = hypertable_partition_colno; src->htpartcolno = hypertable_partition_colno;
src->htpartcoltype = hypertable_partition_coltype; src->htpartcoltype = hypertable_partition_coltype;
src->htpartcol_interval_len = hypertable_partition_col_interval; src->htpartcol_interval_len = hypertable_partition_col_interval;
src->bucket_width = 0; /*invalid value */ src->bucket_width = 0; /* invalid value */
src->interval = NULL; /* not specified by default */
} }
/* Check if the group-by clauses has exactly 1 time_bucket(.., <col>) /*
* where <col> is the hypertable's partitioning column. * Check if the group-by clauses has exactly 1 time_bucket(.., <col>) where
* <col> is the hypertable's partitioning column and other invariants. Then fill
* the `bucket_width` and other fields of `tbinfo`.
*/ */
static void static void
caggtimebucket_validate(CAggTimebucketInfo *tbinfo, List *groupClause, List *targetList) caggtimebucket_validate(CAggTimebucketInfo *tbinfo, List *groupClause, List *targetList)
{ {
ListCell *l; ListCell *l;
bool found = false; bool found = false;
/*
* Make sure bucket_width was initialized by caggtimebucketinfo_init().
* This assumption is used below.
*/
Assert(tbinfo->bucket_width == 0);
foreach (l, groupClause) foreach (l, groupClause)
{ {
SortGroupClause *sgc = (SortGroupClause *) lfirst(l); SortGroupClause *sgc = (SortGroupClause *) lfirst(l);
@ -721,11 +732,19 @@ caggtimebucket_validate(CAggTimebucketInfo *tbinfo, List *groupClause, List *tar
if (IsA(width_arg, Const)) if (IsA(width_arg, Const))
{ {
Const *width = castNode(Const, width_arg); Const *width = castNode(Const, width_arg);
if (width->consttype == INTERVALOID)
{
tbinfo->interval = DatumGetIntervalP(width->constvalue);
if (tbinfo->interval->month != 0)
tbinfo->bucket_width = BUCKET_WIDTH_VARIABLE;
}
tbinfo->bucket_width = if (tbinfo->bucket_width != BUCKET_WIDTH_VARIABLE)
ts_interval_value_to_internal_or_months(width->constvalue, {
width->consttype, /* The bucket size is fixed */
&tbinfo->bucket_width_months); tbinfo->bucket_width =
ts_interval_value_to_internal(width->constvalue, width->consttype);
}
} }
else else
ereport(ERROR, ereport(ERROR,
@ -736,6 +755,22 @@ caggtimebucket_validate(CAggTimebucketInfo *tbinfo, List *groupClause, List *tar
} }
} }
if (tbinfo->bucket_width == BUCKET_WIDTH_VARIABLE)
{
/* variable-sized buckets can be used only with intervals */
Assert(tbinfo->interval != NULL);
if ((tbinfo->interval->month != 0) &&
((tbinfo->interval->day != 0) || (tbinfo->interval->time != 0)))
{
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("invalid interval specified"),
errhint("Use either months or days and hours, but not months, days and hours "
"together")));
}
}
if (!found) if (!found)
elog(ERROR, "continuous aggregate view must include a valid time bucket function"); elog(ERROR, "continuous aggregate view must include a valid time bucket function");
} }
@ -1237,9 +1272,8 @@ get_partialize_funcexpr(Aggref *agg)
} }
/* /*
* check if the supplied oid belongs to a valid bucket function * Check if the supplied OID belongs to a valid bucket function
* for continuous aggregates * for continuous aggregates.
* We only support 2-arg variants of time_bucket
*/ */
static bool static bool
is_valid_bucketing_function(Oid funcid) is_valid_bucketing_function(Oid funcid)
@ -1896,38 +1930,28 @@ cagg_create(const CreateTableAsStmt *create_stmt, ViewStmt *stmt, Query *panquer
create_view_for_query(orig_userview_query, dum_rel); create_view_for_query(orig_userview_query, dum_rel);
/* Step 4 add catalog table entry for the objects we just created */ /* Step 4 add catalog table entry for the objects we just created */
nspid = RangeVarGetCreationNamespace(stmt->view); nspid = RangeVarGetCreationNamespace(stmt->view);
create_cagg_catalog_entry(materialize_hypertable_id, create_cagg_catalog_entry(materialize_hypertable_id,
origquery_ht->htid, origquery_ht->htid,
get_namespace_name(nspid), /*schema name for user view */ get_namespace_name(nspid), /*schema name for user view */
stmt->view->relname, stmt->view->relname,
part_rel->schemaname, part_rel->schemaname,
part_rel->relname, part_rel->relname,
origquery_ht->bucket_width_months ? BUCKET_WIDTH_VARIABLE : origquery_ht->bucket_width,
origquery_ht->bucket_width,
materialized_only, materialized_only,
dum_rel->schemaname, dum_rel->schemaname,
dum_rel->relname); dum_rel->relname);
if (origquery_ht->bucket_width_months) if (origquery_ht->bucket_width == BUCKET_WIDTH_VARIABLE)
{ {
char bucket_width[32]; const char *bucket_width;
/* PostgreSQL's Interval type stores the number of months as int32 */
if (!(origquery_ht->bucket_width <= PG_INT32_MAX))
{
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("number of months exceeds PG_INT32_MAX (%d)", PG_INT32_MAX)));
}
/* /*
* The definition of int64 (and PRId64) differ across the platforms. * Variable-sized buckets work only with intervals.
* We have to implicitly cast it to long long to make %lld always work.
* pg_snprintf and port.h don't seem to have a solution for this.
*/ */
snprintf(bucket_width, Assert(origquery_ht->interval != NULL);
sizeof(bucket_width), bucket_width = DatumGetCString(
"%lld months", DirectFunctionCall1(interval_out, IntervalPGetDatum(origquery_ht->interval)));
(long long int) origquery_ht->bucket_width);
/* /*
* `experimental` = true and `name` = "time_bucket_ng" are hardcoded * `experimental` = true and `name` = "time_bucket_ng" are hardcoded
@ -2011,7 +2035,7 @@ tsl_process_continuous_agg_viewstmt(Node *node, const char *query_string, void *
/* We are creating a refresh window here in a similar way to how it's /* We are creating a refresh window here in a similar way to how it's
* done in continuous_agg_refresh. We do not call the PG function * done in continuous_agg_refresh. We do not call the PG function
* directly since we want to be able to surpress the output in that * directly since we want to be able to suppress the output in that
* function and adding a 'verbose' parameter to is not useful for a * function and adding a 'verbose' parameter to is not useful for a
* user. */ * user. */
relid = get_relname_relid(stmt->into->rel->relname, nspid); relid = get_relname_relid(stmt->into->rel->relname, nspid);

View File

@ -29,6 +29,19 @@ INSERT INTO conditions (day, city, temperature) VALUES
('2021-06-25', 'Moscow', 32), ('2021-06-25', 'Moscow', 32),
('2021-06-26', 'Moscow', 32), ('2021-06-26', 'Moscow', 32),
('2021-06-27', 'Moscow', 31); ('2021-06-27', 'Moscow', 31);
-- Check that buckets like '1 month 15 days' (fixed-sized + variable-sized) are not allowed
\set ON_ERROR_STOP 0
CREATE MATERIALIZED VIEW conditions_summary
WITH (timescaledb.continuous, timescaledb.materialized_only=true) AS
SELECT city,
timescaledb_experimental.time_bucket_ng('1 month 15 days', day) AS bucket,
MIN(temperature),
MAX(temperature)
FROM conditions
GROUP BY city, bucket
WITH NO DATA;
ERROR: invalid interval specified
\set ON_ERROR_STOP 1
-- Make sure it's possible to create an empty cagg (WITH NO DATA) and -- Make sure it's possible to create an empty cagg (WITH NO DATA) and
-- that all the information about the bucketing function will be saved -- that all the information about the bucketing function will be saved
-- to the TS catalog. -- to the TS catalog.
@ -62,7 +75,7 @@ FROM _timescaledb_catalog.continuous_aggs_bucket_function
WHERE mat_hypertable_id = :cagg_id; WHERE mat_hypertable_id = :cagg_id;
experimental | name | bucket_width | origin | timezone experimental | name | bucket_width | origin | timezone
--------------+----------------+--------------+--------+---------- --------------+----------------+--------------+--------+----------
t | time_bucket_ng | 1 months | | t | time_bucket_ng | @ 1 mon | |
(1 row) (1 row)
-- Check that there is no saved invalidation threshold before any refreshes -- Check that there is no saved invalidation threshold before any refreshes
@ -1071,7 +1084,7 @@ FROM _timescaledb_catalog.continuous_aggs_bucket_function
WHERE mat_hypertable_id = :cagg_id; WHERE mat_hypertable_id = :cagg_id;
experimental | name | bucket_width | origin | timezone experimental | name | bucket_width | origin | timezone
--------------+----------------+--------------+--------+---------- --------------+----------------+--------------+--------+----------
t | time_bucket_ng | 1 months | | t | time_bucket_ng | @ 1 mon | |
(1 row) (1 row)
SELECT * FROM conditions_dist_1m ORDER BY bucket; SELECT * FROM conditions_dist_1m ORDER BY bucket;

View File

@ -28,6 +28,19 @@ INSERT INTO conditions (day, city, temperature) VALUES
('2021-06-26', 'Moscow', 32), ('2021-06-26', 'Moscow', 32),
('2021-06-27', 'Moscow', 31); ('2021-06-27', 'Moscow', 31);
-- Check that buckets like '1 month 15 days' (fixed-sized + variable-sized) are not allowed
\set ON_ERROR_STOP 0
CREATE MATERIALIZED VIEW conditions_summary
WITH (timescaledb.continuous, timescaledb.materialized_only=true) AS
SELECT city,
timescaledb_experimental.time_bucket_ng('1 month 15 days', day) AS bucket,
MIN(temperature),
MAX(temperature)
FROM conditions
GROUP BY city, bucket
WITH NO DATA;
\set ON_ERROR_STOP 1
-- Make sure it's possible to create an empty cagg (WITH NO DATA) and -- Make sure it's possible to create an empty cagg (WITH NO DATA) and
-- that all the information about the bucketing function will be saved -- that all the information about the bucketing function will be saved