Allow bucketing by month, year, century in time_bucket

This patch allows bucketing by month for time_bucket with date,
timestamp or timestamptz. When bucketing by month the interval
must only contain month components. When using origin together
with bucketing by month only the year and month components are
honoured.

To bucket by month we get the year and month of a date and convert
that to the nth month since origin. This allows us to treat month
bucketing similar to int bucketing. During this process we ignore
the day component and therefore only support bucketing by full months.
This commit is contained in:
Sven Klemm 2022-08-21 18:56:45 +02:00 committed by Sven Klemm
parent 82fc2cac69
commit c488fcdbc9
3 changed files with 227 additions and 21 deletions

View File

@ -95,8 +95,8 @@ ts_int64_bucket(PG_FUNCTION_ARGS)
/*
* The default origin is Monday 2000-01-03. We don't use PG epoch since it starts on a saturday.
* This makes time-buckets by a week more intuitive and aligns it with
* date_trunc.
* This makes time-buckets by a week more intuitive and aligns it with date_trunc. Since month
* bucketing ignores the day component this makes origin for month buckets 2000-01-01.
*/
#define DEFAULT_ORIGIN (JAN_3_2000)
#define TIME_BUCKET_TS(period, timestamp, result, shift) \
@ -134,6 +134,49 @@ ts_int64_bucket(PG_FUNCTION_ARGS)
(result) += (shift); \
} while (0)
static void
validate_month_bucket(Interval *interval)
{
/*
* Bucketing by a month and non-month cannot be mixed.
*/
Assert(interval->month);
if (interval->day || interval->time)
{
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("month intervals cannot have day or time component")));
}
}
/*
* To bucket by month we get the year and month of a date and convert
* that to the nth month since origin. This allows us to treat month
* bucketing similar to int bucketing. During this process we ignore
* the day component and therefore only support bucketing by full months.
*/
static DateADT
bucket_month(int32 period, DateADT date, DateADT origin)
{
int32 year, month, day;
int32 result;
j2date(date + POSTGRES_EPOCH_JDATE, &year, &month, &day);
int32 timestamp = year * 12 + month - 1;
j2date(origin + POSTGRES_EPOCH_JDATE, &year, &month, &day);
int32 offset = year * 12 + month - 1;
TIME_BUCKET(period, timestamp, offset, PG_INT32_MIN, PG_INT32_MAX, result);
year = result / 12;
month = result % 12;
day = 1;
return date2j(year, month + 1, day) - POSTGRES_EPOCH_JDATE;
}
/* Returns the period in the same representation as Postgres Timestamps.
* Note that this is not our internal representation (microseconds).
* Always returns an exact value.*/
@ -163,14 +206,32 @@ ts_timestamp_bucket(PG_FUNCTION_ARGS)
*/
Timestamp origin = (PG_NARGS() > 2 ? PG_GETARG_TIMESTAMP(2) : DEFAULT_ORIGIN);
Timestamp result;
int64 period = get_interval_period_timestamp_units(interval);
if (TIMESTAMP_NOT_FINITE(timestamp))
PG_RETURN_TIMESTAMP(timestamp);
if (interval->month)
{
DateADT origin_date = 0;
validate_month_bucket(interval);
DateADT date = DatumGetDateADT(DirectFunctionCall1(timestamp_date, PG_GETARG_DATUM(1)));
if (origin != DEFAULT_ORIGIN)
origin_date =
DatumGetDateADT(DirectFunctionCall1(timestamp_date, TimestampGetDatum(origin)));
date = bucket_month(interval->month, date, origin_date);
PG_RETURN_DATUM(DirectFunctionCall1(date_timestamp, DateADTGetDatum(date)));
}
else
{
int64 period = get_interval_period_timestamp_units(interval);
TIME_BUCKET_TS(period, timestamp, result, origin);
PG_RETURN_TIMESTAMP(result);
}
}
TS_FUNCTION_INFO_V1(ts_timestamptz_bucket);
@ -187,14 +248,37 @@ ts_timestamptz_bucket(PG_FUNCTION_ARGS)
*/
TimestampTz origin = (PG_NARGS() > 2 ? PG_GETARG_TIMESTAMPTZ(2) : DEFAULT_ORIGIN);
TimestampTz result;
int64 period = get_interval_period_timestamp_units(interval);
if (TIMESTAMP_NOT_FINITE(timestamp))
PG_RETURN_TIMESTAMPTZ(timestamp);
if (PG_NARGS() > 2)
{
origin = PG_GETARG_TIMESTAMPTZ(2);
}
if (interval->month)
{
DateADT origin_date = 0;
validate_month_bucket(interval);
DateADT date = DatumGetDateADT(DirectFunctionCall1(timestamp_date, PG_GETARG_DATUM(1)));
if (origin != DEFAULT_ORIGIN)
origin_date =
DatumGetDateADT(DirectFunctionCall1(timestamp_date, TimestampTzGetDatum(origin)));
date = bucket_month(interval->month, date, origin_date);
PG_RETURN_DATUM(DirectFunctionCall1(date_timestamp, DateADTGetDatum(date)));
}
else
{
int64 period = get_interval_period_timestamp_units(interval);
TIME_BUCKET_TS(period, timestamp, result, origin);
PG_RETURN_TIMESTAMPTZ(result);
}
}
static inline void
@ -223,27 +307,41 @@ ts_date_bucket(PG_FUNCTION_ARGS)
{
Interval *interval = PG_GETARG_INTERVAL_P(0);
DateADT date = PG_GETARG_DATEADT(1);
Timestamp origin = DEFAULT_ORIGIN;
DateADT origin = 0;
Timestamp origin_ts = DEFAULT_ORIGIN;
Timestamp timestamp, result;
int64 period = -1;
if (DATE_NOT_FINITE(date))
PG_RETURN_DATEADT(date);
period = get_interval_period_timestamp_units(interval);
/* convert to timestamp (NOT tz), bucket, convert back to date */
timestamp = DatumGetTimestamp(DirectFunctionCall1(date_timestamp, PG_GETARG_DATUM(1)));
Assert(!TIMESTAMP_NOT_FINITE(timestamp));
if (PG_NARGS() > 2)
{
origin = PG_GETARG_DATEADT(2);
if (!interval->month)
origin_ts =
DatumGetTimestamp(DirectFunctionCall1(date_timestamp, DateADTGetDatum(origin)));
}
if (interval->month)
{
validate_month_bucket(interval);
date = bucket_month(interval->month, date, origin);
PG_RETURN_DATEADT(date);
}
else
{
int64 period = get_interval_period_timestamp_units(interval);
/* check the period aligns on a date */
check_period_is_daily(period);
/* convert to timestamp (NOT tz), bucket, convert back to date */
timestamp = DatumGetTimestamp(DirectFunctionCall1(date_timestamp, PG_GETARG_DATUM(1)));
if (PG_NARGS() > 2)
origin = DatumGetTimestamp(DirectFunctionCall1(date_timestamp, PG_GETARG_DATUM(2)));
Assert(!TIMESTAMP_NOT_FINITE(timestamp));
TIME_BUCKET_TS(period, timestamp, result, origin);
TIME_BUCKET_TS(period, timestamp, result, origin_ts);
PG_RETURN_DATUM(DirectFunctionCall1(timestamp_date, TimestampGetDatum(result)));
}
}
/* when working with time_buckets stored in our catalog, we may not know ahead of time which

View File

@ -307,9 +307,17 @@ FROM unnest(ARRAY[
\set ON_ERROR_STOP 0
SELECT time_bucket(INTERVAL '1 year',TIMESTAMP '2011-01-02 01:01:01.111');
ERROR: interval defined in terms of month, year, century etc. not supported
time_bucket
--------------------------
Sat Jan 01 00:00:00 2011
(1 row)
SELECT time_bucket(INTERVAL '1 month',TIMESTAMP '2011-01-02 01:01:01.111');
ERROR: interval defined in terms of month, year, century etc. not supported
time_bucket
--------------------------
Sat Jan 01 00:00:00 2011
(1 row)
\set ON_ERROR_STOP 1
SELECT time, time_bucket(INTERVAL '5 minute', time)
FROM unnest(ARRAY[
@ -1223,6 +1231,68 @@ FROM unnest(ARRAY[
]) AS time;
ERROR: timestamp out of range
\set ON_ERROR_STOP 1
-------------------------------------------
--- Test time_bucket with month periods ---
-------------------------------------------
SET datestyle TO ISO;
SELECT
time::date,
time_bucket('1 month', time::date) AS "1m",
time_bucket('2 month', time::date) AS "2m",
time_bucket('3 month', time::date) AS "3m",
time_bucket('1 month', time::date, '2000-02-01'::date) AS "1m origin",
time_bucket('2 month', time::date, '2000-02-01'::date) AS "2m origin",
time_bucket('3 month', time::date, '2000-02-01'::date) AS "3m origin"
FROM generate_series('1990-01-03'::date,'1990-06-03'::date,'1month'::interval) time;
time | 1m | 2m | 3m | 1m origin | 2m origin | 3m origin
------------+------------+------------+------------+------------+------------+------------
1990-01-03 | 1990-01-01 | 1990-01-01 | 1990-01-01 | 1990-01-01 | 1989-12-01 | 1989-11-01
1990-02-03 | 1990-02-01 | 1990-01-01 | 1990-01-01 | 1990-02-01 | 1990-02-01 | 1990-02-01
1990-03-03 | 1990-03-01 | 1990-03-01 | 1990-01-01 | 1990-03-01 | 1990-02-01 | 1990-02-01
1990-04-03 | 1990-04-01 | 1990-03-01 | 1990-04-01 | 1990-04-01 | 1990-04-01 | 1990-02-01
1990-05-03 | 1990-05-01 | 1990-05-01 | 1990-04-01 | 1990-05-01 | 1990-04-01 | 1990-05-01
1990-06-03 | 1990-06-01 | 1990-05-01 | 1990-04-01 | 1990-06-01 | 1990-06-01 | 1990-05-01
(6 rows)
SELECT
time,
time_bucket('1 month', time) AS "1m",
time_bucket('2 month', time) AS "2m",
time_bucket('3 month', time) AS "3m",
time_bucket('1 month', time, '2000-02-01'::timestamp) AS "1m origin",
time_bucket('2 month', time, '2000-02-01'::timestamp) AS "2m origin",
time_bucket('3 month', time, '2000-02-01'::timestamp) AS "3m origin"
FROM generate_series('1990-01-03'::timestamp,'1990-06-03'::timestamp,'1month'::interval) time;
time | 1m | 2m | 3m | 1m origin | 2m origin | 3m origin
---------------------+---------------------+---------------------+---------------------+---------------------+---------------------+---------------------
1990-01-03 00:00:00 | 1990-01-01 00:00:00 | 1990-01-01 00:00:00 | 1990-01-01 00:00:00 | 1990-01-01 00:00:00 | 1989-12-01 00:00:00 | 1989-11-01 00:00:00
1990-02-03 00:00:00 | 1990-02-01 00:00:00 | 1990-01-01 00:00:00 | 1990-01-01 00:00:00 | 1990-02-01 00:00:00 | 1990-02-01 00:00:00 | 1990-02-01 00:00:00
1990-03-03 00:00:00 | 1990-03-01 00:00:00 | 1990-03-01 00:00:00 | 1990-01-01 00:00:00 | 1990-03-01 00:00:00 | 1990-02-01 00:00:00 | 1990-02-01 00:00:00
1990-04-03 00:00:00 | 1990-04-01 00:00:00 | 1990-03-01 00:00:00 | 1990-04-01 00:00:00 | 1990-04-01 00:00:00 | 1990-04-01 00:00:00 | 1990-02-01 00:00:00
1990-05-03 00:00:00 | 1990-05-01 00:00:00 | 1990-05-01 00:00:00 | 1990-04-01 00:00:00 | 1990-05-01 00:00:00 | 1990-04-01 00:00:00 | 1990-05-01 00:00:00
1990-06-03 00:00:00 | 1990-06-01 00:00:00 | 1990-05-01 00:00:00 | 1990-04-01 00:00:00 | 1990-06-01 00:00:00 | 1990-06-01 00:00:00 | 1990-05-01 00:00:00
(6 rows)
SELECT
time,
time_bucket('1 month', time) AS "1m",
time_bucket('2 month', time) AS "2m",
time_bucket('3 month', time) AS "3m",
time_bucket('1 month', time, '2000-02-01'::timestamptz) AS "1m origin",
time_bucket('2 month', time, '2000-02-01'::timestamptz) AS "2m origin",
time_bucket('3 month', time, '2000-02-01'::timestamptz) AS "3m origin"
FROM generate_series('1990-01-03'::timestamptz,'1990-06-03'::timestamptz,'1month'::interval) time;
time | 1m | 2m | 3m | 1m origin | 2m origin | 3m origin
------------------------+------------------------+------------------------+------------------------+------------------------+------------------------+------------------------
1990-01-03 00:00:00-05 | 1989-12-31 19:00:00-05 | 1989-12-31 19:00:00-05 | 1989-12-31 19:00:00-05 | 1989-12-31 19:00:00-05 | 1989-11-30 19:00:00-05 | 1989-10-31 19:00:00-05
1990-02-03 00:00:00-05 | 1990-01-31 19:00:00-05 | 1989-12-31 19:00:00-05 | 1989-12-31 19:00:00-05 | 1990-01-31 19:00:00-05 | 1990-01-31 19:00:00-05 | 1990-01-31 19:00:00-05
1990-03-03 00:00:00-05 | 1990-02-28 19:00:00-05 | 1990-02-28 19:00:00-05 | 1989-12-31 19:00:00-05 | 1990-02-28 19:00:00-05 | 1990-01-31 19:00:00-05 | 1990-01-31 19:00:00-05
1990-04-03 00:00:00-04 | 1990-03-31 19:00:00-05 | 1990-02-28 19:00:00-05 | 1990-03-31 19:00:00-05 | 1990-03-31 19:00:00-05 | 1990-03-31 19:00:00-05 | 1990-01-31 19:00:00-05
1990-05-03 00:00:00-04 | 1990-04-30 20:00:00-04 | 1990-04-30 20:00:00-04 | 1990-03-31 19:00:00-05 | 1990-04-30 20:00:00-04 | 1990-03-31 19:00:00-05 | 1990-04-30 20:00:00-04
1990-06-03 00:00:00-04 | 1990-05-31 20:00:00-04 | 1990-04-30 20:00:00-04 | 1990-03-31 19:00:00-05 | 1990-05-31 20:00:00-04 | 1990-05-31 20:00:00-04 | 1990-04-30 20:00:00-04
(6 rows)
RESET datestyle;
------------------------------------------------------------
--- Test timescaledb_experimental.time_bucket_ng function --
------------------------------------------------------------

View File

@ -598,6 +598,44 @@ FROM unnest(ARRAY[
]) AS time;
\set ON_ERROR_STOP 1
-------------------------------------------
--- Test time_bucket with month periods ---
-------------------------------------------
SET datestyle TO ISO;
SELECT
time::date,
time_bucket('1 month', time::date) AS "1m",
time_bucket('2 month', time::date) AS "2m",
time_bucket('3 month', time::date) AS "3m",
time_bucket('1 month', time::date, '2000-02-01'::date) AS "1m origin",
time_bucket('2 month', time::date, '2000-02-01'::date) AS "2m origin",
time_bucket('3 month', time::date, '2000-02-01'::date) AS "3m origin"
FROM generate_series('1990-01-03'::date,'1990-06-03'::date,'1month'::interval) time;
SELECT
time,
time_bucket('1 month', time) AS "1m",
time_bucket('2 month', time) AS "2m",
time_bucket('3 month', time) AS "3m",
time_bucket('1 month', time, '2000-02-01'::timestamp) AS "1m origin",
time_bucket('2 month', time, '2000-02-01'::timestamp) AS "2m origin",
time_bucket('3 month', time, '2000-02-01'::timestamp) AS "3m origin"
FROM generate_series('1990-01-03'::timestamp,'1990-06-03'::timestamp,'1month'::interval) time;
SELECT
time,
time_bucket('1 month', time) AS "1m",
time_bucket('2 month', time) AS "2m",
time_bucket('3 month', time) AS "3m",
time_bucket('1 month', time, '2000-02-01'::timestamptz) AS "1m origin",
time_bucket('2 month', time, '2000-02-01'::timestamptz) AS "2m origin",
time_bucket('3 month', time, '2000-02-01'::timestamptz) AS "3m origin"
FROM generate_series('1990-01-03'::timestamptz,'1990-06-03'::timestamptz,'1month'::interval) time;
RESET datestyle;
------------------------------------------------------------
--- Test timescaledb_experimental.time_bucket_ng function --
------------------------------------------------------------