From c488fcdbc9931c271361d0e4997a2b60e6eddf19 Mon Sep 17 00:00:00 2001 From: Sven Klemm Date: Sun, 21 Aug 2022 18:56:45 +0200 Subject: [PATCH] Allow bucketing by month, year, century in time_bucket This patch allows bucketing by month for time_bucket with date, timestamp or timestamptz. When bucketing by month the interval must only contain month components. When using origin together with bucketing by month only the year and month components are honoured. To bucket by month we get the year and month of a date and convert that to the nth month since origin. This allows us to treat month bucketing similar to int bucketing. During this process we ignore the day component and therefore only support bucketing by full months. --- src/time_bucket.c | 136 +++++++++++++++++++++++++++++++----- test/expected/timestamp.out | 74 +++++++++++++++++++- test/sql/timestamp.sql | 38 ++++++++++ 3 files changed, 227 insertions(+), 21 deletions(-) diff --git a/src/time_bucket.c b/src/time_bucket.c index b5334e933..2b20e2389 100644 --- a/src/time_bucket.c +++ b/src/time_bucket.c @@ -95,8 +95,8 @@ ts_int64_bucket(PG_FUNCTION_ARGS) /* * The default origin is Monday 2000-01-03. We don't use PG epoch since it starts on a saturday. - * This makes time-buckets by a week more intuitive and aligns it with - * date_trunc. + * This makes time-buckets by a week more intuitive and aligns it with date_trunc. Since month + * bucketing ignores the day component this makes origin for month buckets 2000-01-01. */ #define DEFAULT_ORIGIN (JAN_3_2000) #define TIME_BUCKET_TS(period, timestamp, result, shift) \ @@ -134,6 +134,49 @@ ts_int64_bucket(PG_FUNCTION_ARGS) (result) += (shift); \ } while (0) +static void +validate_month_bucket(Interval *interval) +{ + /* + * Bucketing by a month and non-month cannot be mixed. + */ + Assert(interval->month); + + if (interval->day || interval->time) + { + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("month intervals cannot have day or time component"))); + } +} + +/* + * To bucket by month we get the year and month of a date and convert + * that to the nth month since origin. This allows us to treat month + * bucketing similar to int bucketing. During this process we ignore + * the day component and therefore only support bucketing by full months. + */ +static DateADT +bucket_month(int32 period, DateADT date, DateADT origin) +{ + int32 year, month, day; + int32 result; + + j2date(date + POSTGRES_EPOCH_JDATE, &year, &month, &day); + int32 timestamp = year * 12 + month - 1; + + j2date(origin + POSTGRES_EPOCH_JDATE, &year, &month, &day); + int32 offset = year * 12 + month - 1; + + TIME_BUCKET(period, timestamp, offset, PG_INT32_MIN, PG_INT32_MAX, result); + + year = result / 12; + month = result % 12; + day = 1; + + return date2j(year, month + 1, day) - POSTGRES_EPOCH_JDATE; +} + /* Returns the period in the same representation as Postgres Timestamps. * Note that this is not our internal representation (microseconds). * Always returns an exact value.*/ @@ -163,14 +206,32 @@ ts_timestamp_bucket(PG_FUNCTION_ARGS) */ Timestamp origin = (PG_NARGS() > 2 ? PG_GETARG_TIMESTAMP(2) : DEFAULT_ORIGIN); Timestamp result; - int64 period = get_interval_period_timestamp_units(interval); if (TIMESTAMP_NOT_FINITE(timestamp)) PG_RETURN_TIMESTAMP(timestamp); - TIME_BUCKET_TS(period, timestamp, result, origin); + if (interval->month) + { + DateADT origin_date = 0; + validate_month_bucket(interval); - PG_RETURN_TIMESTAMP(result); + DateADT date = DatumGetDateADT(DirectFunctionCall1(timestamp_date, PG_GETARG_DATUM(1))); + if (origin != DEFAULT_ORIGIN) + origin_date = + DatumGetDateADT(DirectFunctionCall1(timestamp_date, TimestampGetDatum(origin))); + + date = bucket_month(interval->month, date, origin_date); + + PG_RETURN_DATUM(DirectFunctionCall1(date_timestamp, DateADTGetDatum(date))); + } + else + { + int64 period = get_interval_period_timestamp_units(interval); + + TIME_BUCKET_TS(period, timestamp, result, origin); + + PG_RETURN_TIMESTAMP(result); + } } TS_FUNCTION_INFO_V1(ts_timestamptz_bucket); @@ -187,14 +248,37 @@ ts_timestamptz_bucket(PG_FUNCTION_ARGS) */ TimestampTz origin = (PG_NARGS() > 2 ? PG_GETARG_TIMESTAMPTZ(2) : DEFAULT_ORIGIN); TimestampTz result; - int64 period = get_interval_period_timestamp_units(interval); if (TIMESTAMP_NOT_FINITE(timestamp)) PG_RETURN_TIMESTAMPTZ(timestamp); - TIME_BUCKET_TS(period, timestamp, result, origin); + if (PG_NARGS() > 2) + { + origin = PG_GETARG_TIMESTAMPTZ(2); + } - PG_RETURN_TIMESTAMPTZ(result); + if (interval->month) + { + DateADT origin_date = 0; + validate_month_bucket(interval); + + DateADT date = DatumGetDateADT(DirectFunctionCall1(timestamp_date, PG_GETARG_DATUM(1))); + if (origin != DEFAULT_ORIGIN) + origin_date = + DatumGetDateADT(DirectFunctionCall1(timestamp_date, TimestampTzGetDatum(origin))); + + date = bucket_month(interval->month, date, origin_date); + + PG_RETURN_DATUM(DirectFunctionCall1(date_timestamp, DateADTGetDatum(date))); + } + else + { + int64 period = get_interval_period_timestamp_units(interval); + + TIME_BUCKET_TS(period, timestamp, result, origin); + + PG_RETURN_TIMESTAMPTZ(result); + } } static inline void @@ -223,27 +307,41 @@ ts_date_bucket(PG_FUNCTION_ARGS) { Interval *interval = PG_GETARG_INTERVAL_P(0); DateADT date = PG_GETARG_DATEADT(1); - Timestamp origin = DEFAULT_ORIGIN; + DateADT origin = 0; + Timestamp origin_ts = DEFAULT_ORIGIN; Timestamp timestamp, result; - int64 period = -1; if (DATE_NOT_FINITE(date)) PG_RETURN_DATEADT(date); - period = get_interval_period_timestamp_units(interval); - /* check the period aligns on a date */ - check_period_is_daily(period); - /* convert to timestamp (NOT tz), bucket, convert back to date */ timestamp = DatumGetTimestamp(DirectFunctionCall1(date_timestamp, PG_GETARG_DATUM(1))); - if (PG_NARGS() > 2) - origin = DatumGetTimestamp(DirectFunctionCall1(date_timestamp, PG_GETARG_DATUM(2))); - Assert(!TIMESTAMP_NOT_FINITE(timestamp)); - TIME_BUCKET_TS(period, timestamp, result, origin); + if (PG_NARGS() > 2) + { + origin = PG_GETARG_DATEADT(2); + if (!interval->month) + origin_ts = + DatumGetTimestamp(DirectFunctionCall1(date_timestamp, DateADTGetDatum(origin))); + } - PG_RETURN_DATUM(DirectFunctionCall1(timestamp_date, TimestampGetDatum(result))); + if (interval->month) + { + validate_month_bucket(interval); + + date = bucket_month(interval->month, date, origin); + PG_RETURN_DATEADT(date); + } + else + { + int64 period = get_interval_period_timestamp_units(interval); + /* check the period aligns on a date */ + check_period_is_daily(period); + + TIME_BUCKET_TS(period, timestamp, result, origin_ts); + PG_RETURN_DATUM(DirectFunctionCall1(timestamp_date, TimestampGetDatum(result))); + } } /* when working with time_buckets stored in our catalog, we may not know ahead of time which diff --git a/test/expected/timestamp.out b/test/expected/timestamp.out index f19d2a486..a1b8dbc07 100644 --- a/test/expected/timestamp.out +++ b/test/expected/timestamp.out @@ -307,9 +307,17 @@ FROM unnest(ARRAY[ \set ON_ERROR_STOP 0 SELECT time_bucket(INTERVAL '1 year',TIMESTAMP '2011-01-02 01:01:01.111'); -ERROR: interval defined in terms of month, year, century etc. not supported + time_bucket +-------------------------- + Sat Jan 01 00:00:00 2011 +(1 row) + SELECT time_bucket(INTERVAL '1 month',TIMESTAMP '2011-01-02 01:01:01.111'); -ERROR: interval defined in terms of month, year, century etc. not supported + time_bucket +-------------------------- + Sat Jan 01 00:00:00 2011 +(1 row) + \set ON_ERROR_STOP 1 SELECT time, time_bucket(INTERVAL '5 minute', time) FROM unnest(ARRAY[ @@ -1223,6 +1231,68 @@ FROM unnest(ARRAY[ ]) AS time; ERROR: timestamp out of range \set ON_ERROR_STOP 1 +------------------------------------------- +--- Test time_bucket with month periods --- +------------------------------------------- +SET datestyle TO ISO; +SELECT + time::date, + time_bucket('1 month', time::date) AS "1m", + time_bucket('2 month', time::date) AS "2m", + time_bucket('3 month', time::date) AS "3m", + time_bucket('1 month', time::date, '2000-02-01'::date) AS "1m origin", + time_bucket('2 month', time::date, '2000-02-01'::date) AS "2m origin", + time_bucket('3 month', time::date, '2000-02-01'::date) AS "3m origin" +FROM generate_series('1990-01-03'::date,'1990-06-03'::date,'1month'::interval) time; + time | 1m | 2m | 3m | 1m origin | 2m origin | 3m origin +------------+------------+------------+------------+------------+------------+------------ + 1990-01-03 | 1990-01-01 | 1990-01-01 | 1990-01-01 | 1990-01-01 | 1989-12-01 | 1989-11-01 + 1990-02-03 | 1990-02-01 | 1990-01-01 | 1990-01-01 | 1990-02-01 | 1990-02-01 | 1990-02-01 + 1990-03-03 | 1990-03-01 | 1990-03-01 | 1990-01-01 | 1990-03-01 | 1990-02-01 | 1990-02-01 + 1990-04-03 | 1990-04-01 | 1990-03-01 | 1990-04-01 | 1990-04-01 | 1990-04-01 | 1990-02-01 + 1990-05-03 | 1990-05-01 | 1990-05-01 | 1990-04-01 | 1990-05-01 | 1990-04-01 | 1990-05-01 + 1990-06-03 | 1990-06-01 | 1990-05-01 | 1990-04-01 | 1990-06-01 | 1990-06-01 | 1990-05-01 +(6 rows) + +SELECT + time, + time_bucket('1 month', time) AS "1m", + time_bucket('2 month', time) AS "2m", + time_bucket('3 month', time) AS "3m", + time_bucket('1 month', time, '2000-02-01'::timestamp) AS "1m origin", + time_bucket('2 month', time, '2000-02-01'::timestamp) AS "2m origin", + time_bucket('3 month', time, '2000-02-01'::timestamp) AS "3m origin" +FROM generate_series('1990-01-03'::timestamp,'1990-06-03'::timestamp,'1month'::interval) time; + time | 1m | 2m | 3m | 1m origin | 2m origin | 3m origin +---------------------+---------------------+---------------------+---------------------+---------------------+---------------------+--------------------- + 1990-01-03 00:00:00 | 1990-01-01 00:00:00 | 1990-01-01 00:00:00 | 1990-01-01 00:00:00 | 1990-01-01 00:00:00 | 1989-12-01 00:00:00 | 1989-11-01 00:00:00 + 1990-02-03 00:00:00 | 1990-02-01 00:00:00 | 1990-01-01 00:00:00 | 1990-01-01 00:00:00 | 1990-02-01 00:00:00 | 1990-02-01 00:00:00 | 1990-02-01 00:00:00 + 1990-03-03 00:00:00 | 1990-03-01 00:00:00 | 1990-03-01 00:00:00 | 1990-01-01 00:00:00 | 1990-03-01 00:00:00 | 1990-02-01 00:00:00 | 1990-02-01 00:00:00 + 1990-04-03 00:00:00 | 1990-04-01 00:00:00 | 1990-03-01 00:00:00 | 1990-04-01 00:00:00 | 1990-04-01 00:00:00 | 1990-04-01 00:00:00 | 1990-02-01 00:00:00 + 1990-05-03 00:00:00 | 1990-05-01 00:00:00 | 1990-05-01 00:00:00 | 1990-04-01 00:00:00 | 1990-05-01 00:00:00 | 1990-04-01 00:00:00 | 1990-05-01 00:00:00 + 1990-06-03 00:00:00 | 1990-06-01 00:00:00 | 1990-05-01 00:00:00 | 1990-04-01 00:00:00 | 1990-06-01 00:00:00 | 1990-06-01 00:00:00 | 1990-05-01 00:00:00 +(6 rows) + +SELECT + time, + time_bucket('1 month', time) AS "1m", + time_bucket('2 month', time) AS "2m", + time_bucket('3 month', time) AS "3m", + time_bucket('1 month', time, '2000-02-01'::timestamptz) AS "1m origin", + time_bucket('2 month', time, '2000-02-01'::timestamptz) AS "2m origin", + time_bucket('3 month', time, '2000-02-01'::timestamptz) AS "3m origin" +FROM generate_series('1990-01-03'::timestamptz,'1990-06-03'::timestamptz,'1month'::interval) time; + time | 1m | 2m | 3m | 1m origin | 2m origin | 3m origin +------------------------+------------------------+------------------------+------------------------+------------------------+------------------------+------------------------ + 1990-01-03 00:00:00-05 | 1989-12-31 19:00:00-05 | 1989-12-31 19:00:00-05 | 1989-12-31 19:00:00-05 | 1989-12-31 19:00:00-05 | 1989-11-30 19:00:00-05 | 1989-10-31 19:00:00-05 + 1990-02-03 00:00:00-05 | 1990-01-31 19:00:00-05 | 1989-12-31 19:00:00-05 | 1989-12-31 19:00:00-05 | 1990-01-31 19:00:00-05 | 1990-01-31 19:00:00-05 | 1990-01-31 19:00:00-05 + 1990-03-03 00:00:00-05 | 1990-02-28 19:00:00-05 | 1990-02-28 19:00:00-05 | 1989-12-31 19:00:00-05 | 1990-02-28 19:00:00-05 | 1990-01-31 19:00:00-05 | 1990-01-31 19:00:00-05 + 1990-04-03 00:00:00-04 | 1990-03-31 19:00:00-05 | 1990-02-28 19:00:00-05 | 1990-03-31 19:00:00-05 | 1990-03-31 19:00:00-05 | 1990-03-31 19:00:00-05 | 1990-01-31 19:00:00-05 + 1990-05-03 00:00:00-04 | 1990-04-30 20:00:00-04 | 1990-04-30 20:00:00-04 | 1990-03-31 19:00:00-05 | 1990-04-30 20:00:00-04 | 1990-03-31 19:00:00-05 | 1990-04-30 20:00:00-04 + 1990-06-03 00:00:00-04 | 1990-05-31 20:00:00-04 | 1990-04-30 20:00:00-04 | 1990-03-31 19:00:00-05 | 1990-05-31 20:00:00-04 | 1990-05-31 20:00:00-04 | 1990-04-30 20:00:00-04 +(6 rows) + +RESET datestyle; ------------------------------------------------------------ --- Test timescaledb_experimental.time_bucket_ng function -- ------------------------------------------------------------ diff --git a/test/sql/timestamp.sql b/test/sql/timestamp.sql index 7545b4495..810ba47ea 100644 --- a/test/sql/timestamp.sql +++ b/test/sql/timestamp.sql @@ -598,6 +598,44 @@ FROM unnest(ARRAY[ ]) AS time; \set ON_ERROR_STOP 1 +------------------------------------------- +--- Test time_bucket with month periods --- +------------------------------------------- + +SET datestyle TO ISO; + +SELECT + time::date, + time_bucket('1 month', time::date) AS "1m", + time_bucket('2 month', time::date) AS "2m", + time_bucket('3 month', time::date) AS "3m", + time_bucket('1 month', time::date, '2000-02-01'::date) AS "1m origin", + time_bucket('2 month', time::date, '2000-02-01'::date) AS "2m origin", + time_bucket('3 month', time::date, '2000-02-01'::date) AS "3m origin" +FROM generate_series('1990-01-03'::date,'1990-06-03'::date,'1month'::interval) time; + +SELECT + time, + time_bucket('1 month', time) AS "1m", + time_bucket('2 month', time) AS "2m", + time_bucket('3 month', time) AS "3m", + time_bucket('1 month', time, '2000-02-01'::timestamp) AS "1m origin", + time_bucket('2 month', time, '2000-02-01'::timestamp) AS "2m origin", + time_bucket('3 month', time, '2000-02-01'::timestamp) AS "3m origin" +FROM generate_series('1990-01-03'::timestamp,'1990-06-03'::timestamp,'1month'::interval) time; + +SELECT + time, + time_bucket('1 month', time) AS "1m", + time_bucket('2 month', time) AS "2m", + time_bucket('3 month', time) AS "3m", + time_bucket('1 month', time, '2000-02-01'::timestamptz) AS "1m origin", + time_bucket('2 month', time, '2000-02-01'::timestamptz) AS "2m origin", + time_bucket('3 month', time, '2000-02-01'::timestamptz) AS "3m origin" +FROM generate_series('1990-01-03'::timestamptz,'1990-06-03'::timestamptz,'1month'::interval) time; + +RESET datestyle; + ------------------------------------------------------------ --- Test timescaledb_experimental.time_bucket_ng function -- ------------------------------------------------------------