timescaledb/tsl/test/expected/cagg_refresh.out
Fabrízio de Royes Mello 38fcd1b76b Improve Realtime Continuous Aggregate performance
When calling the `cagg_watermark` function to get the watermark of a
Continuous Aggregate we execute a `SELECT MAX(time_dimension)` query
in the underlying materialization hypertable.

The problem is that a `SELECT MAX(time_dimention)` query can be
expensive because it will scan all hypertable chunks increasing the
planning time for a Realtime Continuous Aggregates.

Improved it by creating a new catalog table to serve as a cache table
to store the current Continous Aggregate watermark in the following
situations:
- Create CAgg: store the minimum value of hypertable time dimension
  data type;
- Refresh CAgg: store the last value of the time dimension materialized
  in the underlying materialization hypertable (or the minimum value of
  materialization hypertable time dimension data type if there's no
  data materialized);
- Drop CAgg Chunks: the same as refresh cagg.

Closes #4699, #5307
2023-03-22 16:35:23 -03:00

495 lines
20 KiB
Plaintext

-- This file and its contents are licensed under the Timescale License.
-- Please see the included NOTICE for copyright information and
-- LICENSE-TIMESCALE for a copy of the license.
-- Disable background workers since we are testing manual refresh
\c :TEST_DBNAME :ROLE_SUPERUSER
SELECT _timescaledb_internal.stop_background_workers();
stop_background_workers
-------------------------
t
(1 row)
SET ROLE :ROLE_DEFAULT_PERM_USER;
CREATE TABLE conditions (time timestamptz NOT NULL, device int, temp float);
SELECT create_hypertable('conditions', 'time');
create_hypertable
-------------------------
(1,public,conditions,t)
(1 row)
SELECT setseed(.12);
setseed
---------
(1 row)
INSERT INTO conditions
SELECT t, ceil(abs(timestamp_hash(t::timestamp))%4)::int, abs(timestamp_hash(t::timestamp))%40
FROM generate_series('2020-05-01', '2020-05-05', '10 minutes'::interval) t;
-- Show the most recent data
SELECT * FROM conditions
ORDER BY time DESC, device
LIMIT 10;
time | device | temp
------------------------------+--------+------
Tue May 05 00:00:00 2020 PDT | 2 | 30
Mon May 04 23:50:00 2020 PDT | 2 | 10
Mon May 04 23:40:00 2020 PDT | 0 | 20
Mon May 04 23:30:00 2020 PDT | 1 | 1
Mon May 04 23:20:00 2020 PDT | 2 | 34
Mon May 04 23:10:00 2020 PDT | 1 | 37
Mon May 04 23:00:00 2020 PDT | 0 | 4
Mon May 04 22:50:00 2020 PDT | 2 | 10
Mon May 04 22:40:00 2020 PDT | 1 | 37
Mon May 04 22:30:00 2020 PDT | 0 | 8
(10 rows)
CREATE MATERIALIZED VIEW daily_temp
WITH (timescaledb.continuous,
timescaledb.materialized_only=true)
AS
SELECT time_bucket('1 day', time) AS day, device, avg(temp) AS avg_temp
FROM conditions
GROUP BY 1,2 WITH NO DATA;
-- The continuous aggregate should be empty
SELECT * FROM daily_temp
ORDER BY day DESC, device;
day | device | avg_temp
-----+--------+----------
(0 rows)
-- Refresh one bucket (1 day):
SHOW timezone;
TimeZone
----------
PST8PDT
(1 row)
-- The refresh of a single bucket must align with the start of the day
-- in the bucket's time zone (which is UTC, since time_bucket doesn't
-- support time zone arg)
CALL refresh_continuous_aggregate('daily_temp', '2020-05-03 00:00 UTC', '2020-05-04 00:00 UTC');
CALL refresh_continuous_aggregate('daily_temp', '2020-05-03 17:00 PDT', '2020-05-04 17:00 PDT');
\set ON_ERROR_STOP 0
\set VERBOSITY default
-- These refreshes will fail since they don't align with the bucket's
-- time zone
CALL refresh_continuous_aggregate('daily_temp', '2020-05-03', '2020-05-04');
ERROR: refresh window too small
DETAIL: The refresh window must cover at least one bucket of data.
HINT: Align the refresh window with the bucket time zone or use at least two buckets.
CALL refresh_continuous_aggregate('daily_temp', '2020-05-03 00:00 PDT', '2020-05-04 00:00 PDT');
ERROR: refresh window too small
DETAIL: The refresh window must cover at least one bucket of data.
HINT: Align the refresh window with the bucket time zone or use at least two buckets.
-- Refresh window less than one bucket
CALL refresh_continuous_aggregate('daily_temp', '2020-05-03 00:00 UTC', '2020-05-03 23:59 UTC');
ERROR: refresh window too small
DETAIL: The refresh window must cover at least one bucket of data.
HINT: Align the refresh window with the bucket time zone or use at least two buckets.
-- Refresh window bigger than one bucket, but failing since it is not
-- aligned with bucket boundaries so that it covers a full bucket:
--
-- Refresh window: [----------)
-- Buckets: [------|------]
CALL refresh_continuous_aggregate('daily_temp', '2020-05-03 01:00 UTC', '2020-05-04 08:00 UTC');
ERROR: refresh window too small
DETAIL: The refresh window must cover at least one bucket of data.
HINT: Align the refresh window with the bucket time zone or use at least two buckets.
\set VERBOSITY terse
\set ON_ERROR_STOP 1
-- Refresh the most recent few days:
CALL refresh_continuous_aggregate('daily_temp', '2020-05-02', '2020-05-05 17:00');
SELECT * FROM daily_temp
ORDER BY day DESC, device;
day | device | avg_temp
------------------------------+--------+------------------
Mon May 04 17:00:00 2020 PDT | 0 | 19.3846153846154
Mon May 04 17:00:00 2020 PDT | 1 | 16.5555555555556
Mon May 04 17:00:00 2020 PDT | 2 | 18.5714285714286
Mon May 04 17:00:00 2020 PDT | 3 | 23.5714285714286
Sun May 03 17:00:00 2020 PDT | 0 | 15.7647058823529
Sun May 03 17:00:00 2020 PDT | 1 | 24.3142857142857
Sun May 03 17:00:00 2020 PDT | 2 | 14.8205128205128
Sun May 03 17:00:00 2020 PDT | 3 | 18.1111111111111
Sat May 02 17:00:00 2020 PDT | 0 | 17
Sat May 02 17:00:00 2020 PDT | 1 | 18.75
Sat May 02 17:00:00 2020 PDT | 2 | 20
Sat May 02 17:00:00 2020 PDT | 3 | 21.5217391304348
(12 rows)
-- Refresh the rest (and try DEBUG output)
SET client_min_messages TO DEBUG1;
CALL refresh_continuous_aggregate('daily_temp', '2020-04-30', '2020-05-04');
LOG: statement: CALL refresh_continuous_aggregate('daily_temp', '2020-04-30', '2020-05-04');
DEBUG: refreshing continuous aggregate "daily_temp" in window [ Thu Apr 30 17:00:00 2020 PDT, Sun May 03 17:00:00 2020 PDT ]
DEBUG: hypertable 1 existing watermark >= new invalidation threshold 1588723200000000 1588550400000000
DEBUG: invalidation refresh on "daily_temp" in window [ Thu Apr 30 17:00:00 2020 PDT, Sat May 02 17:00:00 2020 PDT ]
DEBUG: hypertable 2 existing watermark >= new watermark 1588723200000000 1588464000000000
RESET client_min_messages;
LOG: statement: RESET client_min_messages;
-- Compare the aggregate to the equivalent query on the source table
SELECT * FROM daily_temp
ORDER BY day DESC, device;
day | device | avg_temp
------------------------------+--------+------------------
Mon May 04 17:00:00 2020 PDT | 0 | 19.3846153846154
Mon May 04 17:00:00 2020 PDT | 1 | 16.5555555555556
Mon May 04 17:00:00 2020 PDT | 2 | 18.5714285714286
Mon May 04 17:00:00 2020 PDT | 3 | 23.5714285714286
Sun May 03 17:00:00 2020 PDT | 0 | 15.7647058823529
Sun May 03 17:00:00 2020 PDT | 1 | 24.3142857142857
Sun May 03 17:00:00 2020 PDT | 2 | 14.8205128205128
Sun May 03 17:00:00 2020 PDT | 3 | 18.1111111111111
Sat May 02 17:00:00 2020 PDT | 0 | 17
Sat May 02 17:00:00 2020 PDT | 1 | 18.75
Sat May 02 17:00:00 2020 PDT | 2 | 20
Sat May 02 17:00:00 2020 PDT | 3 | 21.5217391304348
Fri May 01 17:00:00 2020 PDT | 0 | 19
Fri May 01 17:00:00 2020 PDT | 1 | 15.1463414634146
Fri May 01 17:00:00 2020 PDT | 2 | 19.7674418604651
Fri May 01 17:00:00 2020 PDT | 3 | 22.25
Thu Apr 30 17:00:00 2020 PDT | 0 | 17.6666666666667
Thu Apr 30 17:00:00 2020 PDT | 1 | 18.8333333333333
Thu Apr 30 17:00:00 2020 PDT | 2 | 16.7586206896552
Thu Apr 30 17:00:00 2020 PDT | 3 | 20.76
(20 rows)
SELECT time_bucket('1 day', time) AS day, device, avg(temp) AS avg_temp
FROM conditions
GROUP BY 1,2
ORDER BY 1 DESC,2;
day | device | avg_temp
------------------------------+--------+------------------
Mon May 04 17:00:00 2020 PDT | 0 | 19.3846153846154
Mon May 04 17:00:00 2020 PDT | 1 | 16.5555555555556
Mon May 04 17:00:00 2020 PDT | 2 | 18.5714285714286
Mon May 04 17:00:00 2020 PDT | 3 | 23.5714285714286
Sun May 03 17:00:00 2020 PDT | 0 | 15.7647058823529
Sun May 03 17:00:00 2020 PDT | 1 | 24.3142857142857
Sun May 03 17:00:00 2020 PDT | 2 | 14.8205128205128
Sun May 03 17:00:00 2020 PDT | 3 | 18.1111111111111
Sat May 02 17:00:00 2020 PDT | 0 | 17
Sat May 02 17:00:00 2020 PDT | 1 | 18.75
Sat May 02 17:00:00 2020 PDT | 2 | 20
Sat May 02 17:00:00 2020 PDT | 3 | 21.5217391304348
Fri May 01 17:00:00 2020 PDT | 0 | 19
Fri May 01 17:00:00 2020 PDT | 1 | 15.1463414634146
Fri May 01 17:00:00 2020 PDT | 2 | 19.7674418604651
Fri May 01 17:00:00 2020 PDT | 3 | 22.25
Thu Apr 30 17:00:00 2020 PDT | 0 | 17.6666666666667
Thu Apr 30 17:00:00 2020 PDT | 1 | 18.8333333333333
Thu Apr 30 17:00:00 2020 PDT | 2 | 16.7586206896552
Thu Apr 30 17:00:00 2020 PDT | 3 | 20.76
(20 rows)
-- Test unusual, but valid input
CALL refresh_continuous_aggregate('daily_temp', '2020-05-01'::timestamptz, '2020-05-03'::date);
NOTICE: continuous aggregate "daily_temp" is already up-to-date
CALL refresh_continuous_aggregate('daily_temp', '2020-05-01'::date, '2020-05-03'::date);
NOTICE: continuous aggregate "daily_temp" is already up-to-date
-- Unbounded window forward in time
CALL refresh_continuous_aggregate('daily_temp', '2020-05-03', NULL);
NOTICE: continuous aggregate "daily_temp" is already up-to-date
CALL refresh_continuous_aggregate('daily_temp', NULL, NULL);
-- Unbounded window back in time
CALL refresh_continuous_aggregate('daily_temp', NULL, '2020-05-01');
NOTICE: continuous aggregate "daily_temp" is already up-to-date
-- Test bad input
\set ON_ERROR_STOP 0
-- Bad continuous aggregate name
CALL refresh_continuous_aggregate(NULL, '2020-05-03', '2020-05-05');
ERROR: invalid continuous aggregate
CALL refresh_continuous_aggregate('xyz', '2020-05-03', '2020-05-05');
ERROR: relation "xyz" does not exist at character 35
-- Valid object, but not a continuous aggregate
CALL refresh_continuous_aggregate('conditions', '2020-05-03', '2020-05-05');
ERROR: relation "conditions" is not a continuous aggregate
-- Object ID with no object
CALL refresh_continuous_aggregate(1, '2020-05-03', '2020-05-05');
ERROR: continuous aggregate does not exist
-- Lacking arguments
CALL refresh_continuous_aggregate('daily_temp');
ERROR: procedure refresh_continuous_aggregate(unknown) does not exist at character 6
CALL refresh_continuous_aggregate('daily_temp', '2020-05-03');
ERROR: procedure refresh_continuous_aggregate(unknown, unknown) does not exist at character 6
-- Bad time ranges
CALL refresh_continuous_aggregate('daily_temp', 'xyz', '2020-05-05');
ERROR: invalid input syntax for type timestamp with time zone: "xyz"
CALL refresh_continuous_aggregate('daily_temp', '2020-05-03', 'xyz');
ERROR: invalid input syntax for type timestamp with time zone: "xyz"
CALL refresh_continuous_aggregate('daily_temp', '2020-05-03', '2020-05-01');
ERROR: refresh window too small
-- Bad time input
CALL refresh_continuous_aggregate('daily_temp', '2020-05-01'::text, '2020-05-03'::text);
ERROR: invalid time argument type "text"
CALL refresh_continuous_aggregate('daily_temp', 0, '2020-05-01');
ERROR: invalid time argument type "integer"
\set ON_ERROR_STOP 1
-- Test different time types
CREATE TABLE conditions_date (time date NOT NULL, device int, temp float);
SELECT create_hypertable('conditions_date', 'time');
create_hypertable
------------------------------
(3,public,conditions_date,t)
(1 row)
CREATE MATERIALIZED VIEW daily_temp_date
WITH (timescaledb.continuous,
timescaledb.materialized_only=true)
AS
SELECT time_bucket('1 day', time) AS day, device, avg(temp) AS avg_temp
FROM conditions_date
GROUP BY 1,2 WITH NO DATA;
CALL refresh_continuous_aggregate('daily_temp_date', '2020-05-01', '2020-05-03');
-- Try max refresh window size
CALL refresh_continuous_aggregate('daily_temp_date', NULL, NULL);
-- Test smallint-based continuous aggregate
CREATE TABLE conditions_smallint (time smallint NOT NULL, device int, temp float);
SELECT create_hypertable('conditions_smallint', 'time', chunk_time_interval => 20);
create_hypertable
----------------------------------
(5,public,conditions_smallint,t)
(1 row)
INSERT INTO conditions_smallint
SELECT t, ceil(abs(timestamp_hash(to_timestamp(t)::timestamp))%4)::smallint, abs(timestamp_hash(to_timestamp(t)::timestamp))%40
FROM generate_series(1, 100, 1) t;
CREATE OR REPLACE FUNCTION smallint_now()
RETURNS smallint LANGUAGE SQL STABLE AS
$$
SELECT coalesce(max(time), 0)::smallint
FROM conditions_smallint
$$;
\set ON_ERROR_STOP 0
-- First try to create an integer-based continuous aggregate without
-- an now function. This should not be allowed.
CREATE MATERIALIZED VIEW cond_20_smallint
WITH (timescaledb.continuous,
timescaledb.materialized_only=true)
AS
SELECT time_bucket(SMALLINT '20', time) AS bucket, device, avg(temp) AS avg_temp
FROM conditions_smallint c
GROUP BY 1,2 WITH NO DATA;
ERROR: custom time function required on hypertable "conditions_smallint"
\set ON_ERROR_STOP 1
SELECT set_integer_now_func('conditions_smallint', 'smallint_now');
set_integer_now_func
----------------------
(1 row)
CREATE MATERIALIZED VIEW cond_20_smallint
WITH (timescaledb.continuous,
timescaledb.materialized_only=true)
AS
SELECT time_bucket(SMALLINT '20', time) AS bucket, device, avg(temp) AS avg_temp
FROM conditions_smallint c
GROUP BY 1,2 WITH NO DATA;
CALL refresh_continuous_aggregate('cond_20_smallint', 0::smallint, 70::smallint);
SELECT * FROM cond_20_smallint
ORDER BY 1,2;
bucket | device | avg_temp
--------+--------+------------------
0 | 0 | 6
0 | 1 | 19
0 | 2 | 14.5
0 | 3 | 21.4
20 | 0 | 15
20 | 1 | 16
20 | 2 | 23.3333333333333
20 | 3 | 13.6666666666667
40 | 0 | 21
40 | 1 | 19.4
40 | 2 | 22
40 | 3 | 21.4
(12 rows)
-- Try max refresh window size
CALL refresh_continuous_aggregate('cond_20_smallint', NULL, NULL);
-- Test int-based continuous aggregate
CREATE TABLE conditions_int (time int NOT NULL, device int, temp float);
SELECT create_hypertable('conditions_int', 'time', chunk_time_interval => 20);
create_hypertable
-----------------------------
(7,public,conditions_int,t)
(1 row)
INSERT INTO conditions_int
SELECT t, ceil(abs(timestamp_hash(to_timestamp(t)::timestamp))%4)::int, abs(timestamp_hash(to_timestamp(t)::timestamp))%40
FROM generate_series(1, 100, 1) t;
CREATE OR REPLACE FUNCTION int_now()
RETURNS int LANGUAGE SQL STABLE AS
$$
SELECT coalesce(max(time), 0)
FROM conditions_int
$$;
SELECT set_integer_now_func('conditions_int', 'int_now');
set_integer_now_func
----------------------
(1 row)
CREATE MATERIALIZED VIEW cond_20_int
WITH (timescaledb.continuous,
timescaledb.materialized_only=true)
AS
SELECT time_bucket(INT '20', time) AS bucket, device, avg(temp) AS avg_temp
FROM conditions_int
GROUP BY 1,2 WITH NO DATA;
CALL refresh_continuous_aggregate('cond_20_int', 0, 65);
SELECT * FROM cond_20_int
ORDER BY 1,2;
bucket | device | avg_temp
--------+--------+------------------
0 | 0 | 6
0 | 1 | 19
0 | 2 | 14.5
0 | 3 | 21.4
20 | 0 | 15
20 | 1 | 16
20 | 2 | 23.3333333333333
20 | 3 | 13.6666666666667
40 | 0 | 21
40 | 1 | 19.4
40 | 2 | 22
40 | 3 | 21.4
(12 rows)
-- Try max refresh window size
CALL refresh_continuous_aggregate('cond_20_int', NULL, NULL);
-- Test bigint-based continuous aggregate
CREATE TABLE conditions_bigint (time bigint NOT NULL, device int, temp float);
SELECT create_hypertable('conditions_bigint', 'time', chunk_time_interval => 20);
create_hypertable
--------------------------------
(9,public,conditions_bigint,t)
(1 row)
INSERT INTO conditions_bigint
SELECT t, ceil(abs(timestamp_hash(to_timestamp(t)::timestamp))%4)::bigint, abs(timestamp_hash(to_timestamp(t)::timestamp))%40
FROM generate_series(1, 100, 1) t;
CREATE OR REPLACE FUNCTION bigint_now()
RETURNS bigint LANGUAGE SQL STABLE AS
$$
SELECT coalesce(max(time), 0)::bigint
FROM conditions_bigint
$$;
SELECT set_integer_now_func('conditions_bigint', 'bigint_now');
set_integer_now_func
----------------------
(1 row)
CREATE MATERIALIZED VIEW cond_20_bigint
WITH (timescaledb.continuous,
timescaledb.materialized_only=true)
AS
SELECT time_bucket(BIGINT '20', time) AS bucket, device, avg(temp) AS avg_temp
FROM conditions_bigint
GROUP BY 1,2 WITH NO DATA;
CALL refresh_continuous_aggregate('cond_20_bigint', 0, 75);
SELECT * FROM cond_20_bigint
ORDER BY 1,2;
bucket | device | avg_temp
--------+--------+------------------
0 | 0 | 6
0 | 1 | 19
0 | 2 | 14.5
0 | 3 | 21.4
20 | 0 | 15
20 | 1 | 16
20 | 2 | 23.3333333333333
20 | 3 | 13.6666666666667
40 | 0 | 21
40 | 1 | 19.4
40 | 2 | 22
40 | 3 | 21.4
(12 rows)
-- Try max refresh window size
CALL refresh_continuous_aggregate('cond_20_bigint', NULL, NULL);
-- Test that WITH NO DATA and WITH DATA works (we use whatever is the
-- default for Postgres, so we do not need to have test for the
-- default).
CREATE MATERIALIZED VIEW weekly_temp_without_data
WITH (timescaledb.continuous,
timescaledb.materialized_only=true)
AS
SELECT time_bucket('7 days', time) AS day, device, avg(temp) AS avg_temp
FROM conditions
GROUP BY 1,2 WITH NO DATA;
CREATE MATERIALIZED VIEW weekly_temp_with_data
WITH (timescaledb.continuous,
timescaledb.materialized_only=true)
AS
SELECT time_bucket('7 days', time) AS day, device, avg(temp) AS avg_temp
FROM conditions
GROUP BY 1,2 WITH DATA;
NOTICE: refreshing continuous aggregate "weekly_temp_with_data"
SELECT * FROM weekly_temp_without_data;
day | device | avg_temp
-----+--------+----------
(0 rows)
SELECT * FROM weekly_temp_with_data ORDER BY 1,2;
day | device | avg_temp
------------------------------+--------+------------------
Sun Apr 26 17:00:00 2020 PDT | 0 | 17.8181818181818
Sun Apr 26 17:00:00 2020 PDT | 1 | 17.2474226804124
Sun Apr 26 17:00:00 2020 PDT | 2 | 18.9803921568627
Sun Apr 26 17:00:00 2020 PDT | 3 | 21.5631067961165
Sun May 03 17:00:00 2020 PDT | 0 | 16.7659574468085
Sun May 03 17:00:00 2020 PDT | 1 | 22.7272727272727
Sun May 03 17:00:00 2020 PDT | 2 | 15.811320754717
Sun May 03 17:00:00 2020 PDT | 3 | 19
(8 rows)
\set ON_ERROR_STOP 0
-- REFRESH MATERIALIZED VIEW is blocked on continuous aggregates
REFRESH MATERIALIZED VIEW weekly_temp_without_data;
ERROR: operation not supported on continuous aggregate
-- These should fail since we do not allow refreshing inside a
-- transaction, not even as part of CREATE MATERIALIZED VIEW.
DO LANGUAGE PLPGSQL $$ BEGIN
CREATE MATERIALIZED VIEW weekly_conditions
WITH (timescaledb.continuous,
timescaledb.materialized_only=true)
AS
SELECT time_bucket('7 days', time) AS day, device, avg(temp) AS avg_temp
FROM conditions
GROUP BY 1,2 WITH DATA;
END $$;
ERROR: CREATE MATERIALIZED VIEW ... WITH DATA cannot be executed from a function
BEGIN;
CREATE MATERIALIZED VIEW weekly_conditions
WITH (timescaledb.continuous,
timescaledb.materialized_only=true)
AS
SELECT time_bucket('7 days', time) AS day, device, avg(temp) AS avg_temp
FROM conditions
GROUP BY 1,2 WITH DATA;
ERROR: CREATE MATERIALIZED VIEW ... WITH DATA cannot run inside a transaction block
COMMIT;
\set ON_ERROR_STOP 1
-- This should not fail since we do not refresh the continuous
-- aggregate.
DO LANGUAGE PLPGSQL $$ BEGIN
CREATE MATERIALIZED VIEW weekly_conditions_1
WITH (timescaledb.continuous,
timescaledb.materialized_only=true)
AS
SELECT time_bucket('7 days', time) AS day, device, avg(temp) AS avg_temp
FROM conditions
GROUP BY 1,2 WITH NO DATA;
END $$;
BEGIN;
CREATE MATERIALIZED VIEW weekly_conditions_2
WITH (timescaledb.continuous,
timescaledb.materialized_only=true)
AS
SELECT time_bucket('7 days', time) AS day, device, avg(temp) AS avg_temp
FROM conditions
GROUP BY 1,2 WITH NO DATA;
COMMIT;