mirror of
https://github.com/timescale/timescaledb.git
synced 2025-05-28 01:30:29 +08:00
Add usage test for continuous aggs
This adds a usage test that serves to document the user-facing interface for continuous aggregates. It could serve to educate the user as well as provide examples. It should match the functional spec.
This commit is contained in:
parent
321944928d
commit
15e65bf7ed
264
tsl/test/expected/continuous_aggs_usage.out
Normal file
264
tsl/test/expected/continuous_aggs_usage.out
Normal file
@ -0,0 +1,264 @@
|
||||
-- This file and its contents are licensed under the Timescale License.
|
||||
-- Please see the included NOTICE for copyright information and
|
||||
-- LICENSE-TIMESCALE for a copy of the license.
|
||||
-- TEST SETUP --
|
||||
\c :TEST_DBNAME :ROLE_SUPERUSER
|
||||
-- stop the continous aggregate background workers from interfering, nothing will be scheduled.
|
||||
-- In normal execution you wouldn't do this, but for tests we need to be reproducible regardless of what the scheduler does.
|
||||
SELECT _timescaledb_internal.stop_background_workers();
|
||||
stop_background_workers
|
||||
-------------------------
|
||||
t
|
||||
(1 row)
|
||||
|
||||
\c :TEST_DBNAME :ROLE_DEFAULT_PERM_USER
|
||||
\set ON_ERROR_STOP 0
|
||||
-- START OF USAGE TEST --
|
||||
--First create your hypertable
|
||||
CREATE TABLE device_readings (
|
||||
observation_time TIMESTAMPTZ NOT NULL,
|
||||
device_id TEXT NOT NULL,
|
||||
metric DOUBLE PRECISION NOT NULL,
|
||||
PRIMARY KEY(observation_time, device_id)
|
||||
);
|
||||
SELECT table_name FROM create_hypertable('device_readings', 'observation_time');
|
||||
table_name
|
||||
-----------------
|
||||
device_readings
|
||||
(1 row)
|
||||
|
||||
--Next, create your continuous aggregate view
|
||||
CREATE VIEW device_summary
|
||||
WITH (timescaledb.continuous) --This flag is what makes the view continuous
|
||||
AS
|
||||
SELECT
|
||||
time_bucket('1 hour', observation_time) as bucket, --time_bucket is required
|
||||
device_id,
|
||||
avg(metric) as metric_avg, --We can use regular aggregates
|
||||
max(metric)-min(metric) as metric_spread --We can also use expressions on aggregates and constants
|
||||
FROM
|
||||
device_readings
|
||||
GROUP BY bucket, device_id; --We have to group by the bucket column, but can also add other group-by columns
|
||||
NOTICE: adding not-null constraint to column "time_partition_col"
|
||||
--Next, insert some data into the raw hypertable
|
||||
INSERT INTO device_readings
|
||||
SELECT ts, 'device_1', (EXTRACT(EPOCH FROM ts)) from generate_series('2018-12-01 00:00'::timestamp, '2018-12-31 00:00'::timestamp, '30 minutes') ts;
|
||||
INSERT INTO device_readings
|
||||
SELECT ts, 'device_2', (EXTRACT(EPOCH FROM ts)) from generate_series('2018-12-01 00:00'::timestamp, '2018-12-31 00:00'::timestamp, '30 minutes') ts;
|
||||
--Initially, it will be empty.
|
||||
SELECT * FROM device_summary;
|
||||
bucket | device_id | metric_avg | metric_spread
|
||||
--------+-----------+------------+---------------
|
||||
(0 rows)
|
||||
|
||||
--Normally, the continuous view will be updated automatically on a schedule but, you can also do it manually.
|
||||
REFRESH MATERIALIZED VIEW device_summary;
|
||||
INFO: new materialization range for public.device_readings (time column observation_time) (1546236000000000)
|
||||
INFO: materializing continuous aggregate public.device_summary: new range up to 1546236000000000
|
||||
--Now you can run selects over your view as normal
|
||||
SELECT * FROM device_summary WHERE metric_spread = 1800 ORDER BY bucket DESC, device_id LIMIT 10;
|
||||
bucket | device_id | metric_avg | metric_spread
|
||||
------------------------------+-----------+------------+---------------
|
||||
Sun Dec 30 21:00:00 2018 PST | device_1 | 1546204500 | 1800
|
||||
Sun Dec 30 21:00:00 2018 PST | device_2 | 1546204500 | 1800
|
||||
Sun Dec 30 20:00:00 2018 PST | device_1 | 1546200900 | 1800
|
||||
Sun Dec 30 20:00:00 2018 PST | device_2 | 1546200900 | 1800
|
||||
Sun Dec 30 19:00:00 2018 PST | device_1 | 1546197300 | 1800
|
||||
Sun Dec 30 19:00:00 2018 PST | device_2 | 1546197300 | 1800
|
||||
Sun Dec 30 18:00:00 2018 PST | device_1 | 1546193700 | 1800
|
||||
Sun Dec 30 18:00:00 2018 PST | device_2 | 1546193700 | 1800
|
||||
Sun Dec 30 17:00:00 2018 PST | device_1 | 1546190100 | 1800
|
||||
Sun Dec 30 17:00:00 2018 PST | device_2 | 1546190100 | 1800
|
||||
(10 rows)
|
||||
|
||||
--You can view informaton about your continuous aggregates. The meaning of these fields will be explained further down.
|
||||
\x
|
||||
SELECT * FROM timescaledb_information.continuous_aggregates;
|
||||
-[ RECORD 1 ]--------------+-------------------------------------------------------------------------------------------------------------
|
||||
view_name | device_summary
|
||||
view_owner | default_perm_user
|
||||
refresh_lag | @ 2 hours
|
||||
refresh_interval | @ 2 hours
|
||||
max_interval_per_job | @ 106751991 days 4 hours 54.775807 secs
|
||||
materialization_hypertable | _timescaledb_internal._materialized_hypertable_2
|
||||
view_definition | SELECT time_bucket('@ 1 hour'::interval, device_readings.observation_time) AS time_partition_col, +
|
||||
| device_readings.device_id, +
|
||||
| avg(device_readings.metric) AS metric_avg, +
|
||||
| (max(device_readings.metric) - min(device_readings.metric)) AS metric_spread +
|
||||
| FROM device_readings +
|
||||
| GROUP BY (time_bucket('@ 1 hour'::interval, device_readings.observation_time)), device_readings.device_id;
|
||||
|
||||
--You can also view information about your background workers.
|
||||
--Note: (some fields are empty because there are no background workers used in tests)
|
||||
SELECT * FROM timescaledb_information.continuous_aggregate_stats;
|
||||
-[ RECORD 1 ]----------+-----------------------------
|
||||
view_name | device_summary
|
||||
completed_threshold | Sun Dec 30 22:00:00 2018 PST
|
||||
invalidation_threshold | Sun Dec 30 22:00:00 2018 PST
|
||||
job_id | 1000
|
||||
last_run_started_at |
|
||||
job_status |
|
||||
last_run_duration |
|
||||
next_scheduled_run |
|
||||
|
||||
\x
|
||||
--
|
||||
-- Refresh interval
|
||||
--
|
||||
-- The refresh interval determines how often the background worker
|
||||
-- for automatic materialization will run. The default is (2 x bucket_width)
|
||||
SELECT refresh_interval FROM timescaledb_information.continuous_aggregates;
|
||||
refresh_interval
|
||||
------------------
|
||||
@ 2 hours
|
||||
(1 row)
|
||||
|
||||
-- You can change this setting with ALTER VIEW (equivalently, specify in WITH clause of CREATE VIEW)
|
||||
ALTER VIEW device_summary SET (timescaledb.refresh_interval = '1 hour');
|
||||
SELECT refresh_interval FROM timescaledb_information.continuous_aggregates;
|
||||
refresh_interval
|
||||
------------------
|
||||
@ 1 hour
|
||||
(1 row)
|
||||
|
||||
--
|
||||
-- Refresh lag
|
||||
--
|
||||
-- Materialization have a refresh lag, which means that the materialization will not contain
|
||||
-- the most up-to-date data.
|
||||
-- Namely, it will only contain data where: bucket end < (max(time)-refresh_lag)
|
||||
--By default refresh_lag is 2 x bucket_width
|
||||
SELECT refresh_lag FROM timescaledb_information.continuous_aggregates;
|
||||
refresh_lag
|
||||
-------------
|
||||
@ 2 hours
|
||||
(1 row)
|
||||
|
||||
SELECT max(observation_time) FROM device_readings;
|
||||
max
|
||||
------------------------------
|
||||
Mon Dec 31 00:00:00 2018 PST
|
||||
(1 row)
|
||||
|
||||
SELECT max(bucket) FROM device_summary;
|
||||
max
|
||||
------------------------------
|
||||
Sun Dec 30 21:00:00 2018 PST
|
||||
(1 row)
|
||||
|
||||
--You can change the refresh_lag (equivalently, specify in WITH clause of CREATE VIEW)
|
||||
--Negative values create materialization where the bucket ends after the max of the raw data.
|
||||
--So to have you data always up-to-date make the refresh_lag (-bucket_width). Note this
|
||||
--will slow down your inserts because of invalidation.
|
||||
ALTER VIEW device_summary SET (timescaledb.refresh_lag = '-1 hour');
|
||||
REFRESH MATERIALIZED VIEW device_summary;
|
||||
INFO: new materialization range for public.device_readings (time column observation_time) (1546246800000000)
|
||||
INFO: materializing continuous aggregate public.device_summary: new range up to 1546246800000000
|
||||
SELECT max(observation_time) FROM device_readings;
|
||||
max
|
||||
------------------------------
|
||||
Mon Dec 31 00:00:00 2018 PST
|
||||
(1 row)
|
||||
|
||||
SELECT max(bucket) FROM device_summary;
|
||||
max
|
||||
------------------------------
|
||||
Mon Dec 31 00:00:00 2018 PST
|
||||
(1 row)
|
||||
|
||||
--
|
||||
-- Invalidations
|
||||
--
|
||||
--Changes to the raw table, for values that have already been materialized are propagated asynchronously, after the materialization next runs.
|
||||
--Before update:
|
||||
SELECT * FROM device_summary WHERE device_id = 'device_1' and bucket = 'Sun Dec 30 13:00:00 2018 PST';
|
||||
bucket | device_id | metric_avg | metric_spread
|
||||
------------------------------+-----------+------------+---------------
|
||||
Sun Dec 30 13:00:00 2018 PST | device_1 | 1546175700 | 1800
|
||||
(1 row)
|
||||
|
||||
INSERT INTO device_readings VALUES ('Sun Dec 30 13:01:00 2018 PST', 'device_1', 1.0);
|
||||
--Change not reflected before materializer runs.
|
||||
SELECT * FROM device_summary WHERE device_id = 'device_1' and bucket = 'Sun Dec 30 13:00:00 2018 PST';
|
||||
bucket | device_id | metric_avg | metric_spread
|
||||
------------------------------+-----------+------------+---------------
|
||||
Sun Dec 30 13:00:00 2018 PST | device_1 | 1546175700 | 1800
|
||||
(1 row)
|
||||
|
||||
REFRESH MATERIALIZED VIEW device_summary;
|
||||
INFO: new materialization range not found for public.device_readings (time column observation_time): no new data
|
||||
INFO: materializing continuous aggregate public.device_summary: no new range to materialize
|
||||
--But is reflected after.
|
||||
SELECT * FROM device_summary WHERE device_id = 'device_1' and bucket = 'Sun Dec 30 13:00:00 2018 PST';
|
||||
bucket | device_id | metric_avg | metric_spread
|
||||
------------------------------+-----------+------------------+---------------
|
||||
Sun Dec 30 13:00:00 2018 PST | device_1 | 1030783800.33333 | 1546176599
|
||||
(1 row)
|
||||
|
||||
--
|
||||
-- Dealing with timezones
|
||||
--
|
||||
-- You cannot use any functions that depend on the local timezone setting inside a continuous aggregate.
|
||||
-- For example you cannot cast to the local time. This is because
|
||||
-- a timezone setting can alter from user-to-user and thus
|
||||
-- cannot be materialized.
|
||||
DROP VIEW device_summary CASCADE;
|
||||
NOTICE: drop cascades to table _timescaledb_internal._hyper_2_6_chunk
|
||||
CREATE VIEW device_summary
|
||||
WITH (timescaledb.continuous)
|
||||
AS
|
||||
SELECT
|
||||
time_bucket('1 hour', observation_time) as bucket,
|
||||
min(observation_time::timestamp) as min_time, --note the cast to localtime
|
||||
device_id,
|
||||
avg(metric) as metric_avg,
|
||||
max(metric)-min(metric) as metric_spread
|
||||
FROM
|
||||
device_readings
|
||||
GROUP BY bucket, device_id;
|
||||
ERROR: only immutable functions are supported for continuous aggregate query
|
||||
--note the error.
|
||||
-- You have two options:
|
||||
-- Option 1: be explicit in your timezone:
|
||||
DROP VIEW device_summary CASCADE;
|
||||
ERROR: view "device_summary" does not exist
|
||||
CREATE VIEW device_summary
|
||||
WITH (timescaledb.continuous)
|
||||
AS
|
||||
SELECT
|
||||
time_bucket('1 hour', observation_time) as bucket,
|
||||
min(observation_time AT TIME ZONE 'EST') as min_time, --note the explict timezone
|
||||
device_id,
|
||||
avg(metric) as metric_avg,
|
||||
max(metric)-min(metric) as metric_spread
|
||||
FROM
|
||||
device_readings
|
||||
GROUP BY bucket, device_id;
|
||||
NOTICE: adding not-null constraint to column "time_partition_col"
|
||||
DROP VIEW device_summary CASCADE;
|
||||
-- Option 2: Keep things as TIMESTAMPTZ in the view and convert to local time when
|
||||
-- querying from the view
|
||||
DROP VIEW device_summary CASCADE;
|
||||
ERROR: view "device_summary" does not exist
|
||||
CREATE VIEW device_summary
|
||||
WITH (timescaledb.continuous)
|
||||
AS
|
||||
SELECT
|
||||
time_bucket('1 hour', observation_time) as bucket,
|
||||
min(observation_time) as min_time, --this is a TIMESTAMPTZ
|
||||
device_id,
|
||||
avg(metric) as metric_avg,
|
||||
max(metric)-min(metric) as metric_spread
|
||||
FROM
|
||||
device_readings
|
||||
GROUP BY bucket, device_id;
|
||||
NOTICE: adding not-null constraint to column "time_partition_col"
|
||||
REFRESH MATERIALIZED VIEW device_summary;
|
||||
INFO: new materialization range for public.device_readings (time column observation_time) (1546236000000000)
|
||||
INFO: materializing continuous aggregate public.device_summary: new range up to 1546236000000000
|
||||
SELECT min(min_time)::timestamp FROM device_summary;
|
||||
min
|
||||
--------------------------
|
||||
Sat Dec 01 00:00:00 2018
|
||||
(1 row)
|
||||
|
@ -9,6 +9,7 @@ set(TEST_FILES
|
||||
continuous_aggs.sql
|
||||
continuous_aggs_errors.sql
|
||||
continuous_aggs_dump.sql
|
||||
continuous_aggs_usage.sql
|
||||
)
|
||||
|
||||
set(TEST_FILES_DEBUG
|
||||
|
172
tsl/test/sql/continuous_aggs_usage.sql
Normal file
172
tsl/test/sql/continuous_aggs_usage.sql
Normal file
@ -0,0 +1,172 @@
|
||||
-- This file and its contents are licensed under the Timescale License.
|
||||
-- Please see the included NOTICE for copyright information and
|
||||
-- LICENSE-TIMESCALE for a copy of the license.
|
||||
|
||||
-- TEST SETUP --
|
||||
\c :TEST_DBNAME :ROLE_SUPERUSER
|
||||
-- stop the continous aggregate background workers from interfering, nothing will be scheduled.
|
||||
-- In normal execution you wouldn't do this, but for tests we need to be reproducible regardless of what the scheduler does.
|
||||
SELECT _timescaledb_internal.stop_background_workers();
|
||||
\c :TEST_DBNAME :ROLE_DEFAULT_PERM_USER
|
||||
\set ON_ERROR_STOP 0
|
||||
|
||||
|
||||
-- START OF USAGE TEST --
|
||||
|
||||
--First create your hypertable
|
||||
CREATE TABLE device_readings (
|
||||
observation_time TIMESTAMPTZ NOT NULL,
|
||||
device_id TEXT NOT NULL,
|
||||
metric DOUBLE PRECISION NOT NULL,
|
||||
PRIMARY KEY(observation_time, device_id)
|
||||
);
|
||||
SELECT table_name FROM create_hypertable('device_readings', 'observation_time');
|
||||
|
||||
--Next, create your continuous aggregate view
|
||||
CREATE VIEW device_summary
|
||||
WITH (timescaledb.continuous) --This flag is what makes the view continuous
|
||||
AS
|
||||
SELECT
|
||||
time_bucket('1 hour', observation_time) as bucket, --time_bucket is required
|
||||
device_id,
|
||||
avg(metric) as metric_avg, --We can use regular aggregates
|
||||
max(metric)-min(metric) as metric_spread --We can also use expressions on aggregates and constants
|
||||
FROM
|
||||
device_readings
|
||||
GROUP BY bucket, device_id; --We have to group by the bucket column, but can also add other group-by columns
|
||||
|
||||
--Next, insert some data into the raw hypertable
|
||||
INSERT INTO device_readings
|
||||
SELECT ts, 'device_1', (EXTRACT(EPOCH FROM ts)) from generate_series('2018-12-01 00:00'::timestamp, '2018-12-31 00:00'::timestamp, '30 minutes') ts;
|
||||
INSERT INTO device_readings
|
||||
SELECT ts, 'device_2', (EXTRACT(EPOCH FROM ts)) from generate_series('2018-12-01 00:00'::timestamp, '2018-12-31 00:00'::timestamp, '30 minutes') ts;
|
||||
|
||||
--Initially, it will be empty.
|
||||
SELECT * FROM device_summary;
|
||||
|
||||
--Normally, the continuous view will be updated automatically on a schedule but, you can also do it manually.
|
||||
REFRESH MATERIALIZED VIEW device_summary;
|
||||
|
||||
--Now you can run selects over your view as normal
|
||||
SELECT * FROM device_summary WHERE metric_spread = 1800 ORDER BY bucket DESC, device_id LIMIT 10;
|
||||
|
||||
--You can view informaton about your continuous aggregates. The meaning of these fields will be explained further down.
|
||||
\x
|
||||
SELECT * FROM timescaledb_information.continuous_aggregates;
|
||||
|
||||
--You can also view information about your background workers.
|
||||
--Note: (some fields are empty because there are no background workers used in tests)
|
||||
SELECT * FROM timescaledb_information.continuous_aggregate_stats;
|
||||
\x
|
||||
|
||||
--
|
||||
-- Refresh interval
|
||||
--
|
||||
|
||||
-- The refresh interval determines how often the background worker
|
||||
-- for automatic materialization will run. The default is (2 x bucket_width)
|
||||
SELECT refresh_interval FROM timescaledb_information.continuous_aggregates;
|
||||
|
||||
-- You can change this setting with ALTER VIEW (equivalently, specify in WITH clause of CREATE VIEW)
|
||||
ALTER VIEW device_summary SET (timescaledb.refresh_interval = '1 hour');
|
||||
SELECT refresh_interval FROM timescaledb_information.continuous_aggregates;
|
||||
|
||||
|
||||
--
|
||||
-- Refresh lag
|
||||
--
|
||||
|
||||
-- Materialization have a refresh lag, which means that the materialization will not contain
|
||||
-- the most up-to-date data.
|
||||
-- Namely, it will only contain data where: bucket end < (max(time)-refresh_lag)
|
||||
|
||||
--By default refresh_lag is 2 x bucket_width
|
||||
SELECT refresh_lag FROM timescaledb_information.continuous_aggregates;
|
||||
SELECT max(observation_time) FROM device_readings;
|
||||
SELECT max(bucket) FROM device_summary;
|
||||
|
||||
--You can change the refresh_lag (equivalently, specify in WITH clause of CREATE VIEW)
|
||||
--Negative values create materialization where the bucket ends after the max of the raw data.
|
||||
--So to have you data always up-to-date make the refresh_lag (-bucket_width). Note this
|
||||
--will slow down your inserts because of invalidation.
|
||||
ALTER VIEW device_summary SET (timescaledb.refresh_lag = '-1 hour');
|
||||
REFRESH MATERIALIZED VIEW device_summary;
|
||||
SELECT max(observation_time) FROM device_readings;
|
||||
SELECT max(bucket) FROM device_summary;
|
||||
|
||||
--
|
||||
-- Invalidations
|
||||
--
|
||||
|
||||
--Changes to the raw table, for values that have already been materialized are propagated asynchronously, after the materialization next runs.
|
||||
--Before update:
|
||||
SELECT * FROM device_summary WHERE device_id = 'device_1' and bucket = 'Sun Dec 30 13:00:00 2018 PST';
|
||||
|
||||
INSERT INTO device_readings VALUES ('Sun Dec 30 13:01:00 2018 PST', 'device_1', 1.0);
|
||||
|
||||
--Change not reflected before materializer runs.
|
||||
SELECT * FROM device_summary WHERE device_id = 'device_1' and bucket = 'Sun Dec 30 13:00:00 2018 PST';
|
||||
REFRESH MATERIALIZED VIEW device_summary;
|
||||
--But is reflected after.
|
||||
SELECT * FROM device_summary WHERE device_id = 'device_1' and bucket = 'Sun Dec 30 13:00:00 2018 PST';
|
||||
|
||||
--
|
||||
-- Dealing with timezones
|
||||
--
|
||||
|
||||
-- You cannot use any functions that depend on the local timezone setting inside a continuous aggregate.
|
||||
-- For example you cannot cast to the local time. This is because
|
||||
-- a timezone setting can alter from user-to-user and thus
|
||||
-- cannot be materialized.
|
||||
|
||||
DROP VIEW device_summary CASCADE;
|
||||
CREATE VIEW device_summary
|
||||
WITH (timescaledb.continuous)
|
||||
AS
|
||||
SELECT
|
||||
time_bucket('1 hour', observation_time) as bucket,
|
||||
min(observation_time::timestamp) as min_time, --note the cast to localtime
|
||||
device_id,
|
||||
avg(metric) as metric_avg,
|
||||
max(metric)-min(metric) as metric_spread
|
||||
FROM
|
||||
device_readings
|
||||
GROUP BY bucket, device_id;
|
||||
--note the error.
|
||||
|
||||
-- You have two options:
|
||||
-- Option 1: be explicit in your timezone:
|
||||
|
||||
DROP VIEW device_summary CASCADE;
|
||||
CREATE VIEW device_summary
|
||||
WITH (timescaledb.continuous)
|
||||
AS
|
||||
SELECT
|
||||
time_bucket('1 hour', observation_time) as bucket,
|
||||
min(observation_time AT TIME ZONE 'EST') as min_time, --note the explict timezone
|
||||
device_id,
|
||||
avg(metric) as metric_avg,
|
||||
max(metric)-min(metric) as metric_spread
|
||||
FROM
|
||||
device_readings
|
||||
GROUP BY bucket, device_id;
|
||||
DROP VIEW device_summary CASCADE;
|
||||
|
||||
-- Option 2: Keep things as TIMESTAMPTZ in the view and convert to local time when
|
||||
-- querying from the view
|
||||
|
||||
DROP VIEW device_summary CASCADE;
|
||||
CREATE VIEW device_summary
|
||||
WITH (timescaledb.continuous)
|
||||
AS
|
||||
SELECT
|
||||
time_bucket('1 hour', observation_time) as bucket,
|
||||
min(observation_time) as min_time, --this is a TIMESTAMPTZ
|
||||
device_id,
|
||||
avg(metric) as metric_avg,
|
||||
max(metric)-min(metric) as metric_spread
|
||||
FROM
|
||||
device_readings
|
||||
GROUP BY bucket, device_id;
|
||||
REFRESH MATERIALIZED VIEW device_summary;
|
||||
SELECT min(min_time)::timestamp FROM device_summary;
|
Loading…
x
Reference in New Issue
Block a user