timescaledb/tsl/test/expected/cagg_refresh_using_merge.out
Fabrízio de Royes Mello 8a32f91036 Avoid unnecessary DELETE on materialization
If there's no rows in the materialization hypertable to be updated in
the specific window range then fallback to INSERT rows.

It improves performance of the materialization step in the CAggs
refresh and also avoid unnecessary DELETEs.
2024-10-15 16:30:25 -03:00

683 lines
32 KiB
Plaintext

-- This file and its contents are licensed under the Timescale License.
-- Please see the included NOTICE for copyright information and
-- LICENSE-TIMESCALE for a copy of the license.
-- Enable MERGE statements for continuous aggregate refresh
SET timescaledb.enable_merge_on_cagg_refresh TO ON;
SET timezone TO PST8PDT;
\ir include/cagg_refresh_common.sql
-- This file and its contents are licensed under the Timescale License.
-- Please see the included NOTICE for copyright information and
-- LICENSE-TIMESCALE for a copy of the license.
CREATE TABLE conditions (time timestamptz NOT NULL, device int, temp float);
SELECT create_hypertable('conditions', 'time');
create_hypertable
-------------------------
(1,public,conditions,t)
(1 row)
SELECT setseed(.12);
setseed
---------
(1 row)
INSERT INTO conditions
SELECT t, ceil(abs(timestamp_hash(t::timestamp))%4)::int, abs(timestamp_hash(t::timestamp))%40
FROM generate_series('2020-05-01', '2020-05-05', '10 minutes'::interval) t;
-- Show the most recent data
SELECT * FROM conditions
ORDER BY time DESC, device
LIMIT 10;
time | device | temp
------------------------------+--------+------
Tue May 05 00:00:00 2020 PDT | 2 | 30
Mon May 04 23:50:00 2020 PDT | 2 | 10
Mon May 04 23:40:00 2020 PDT | 0 | 20
Mon May 04 23:30:00 2020 PDT | 1 | 1
Mon May 04 23:20:00 2020 PDT | 2 | 34
Mon May 04 23:10:00 2020 PDT | 1 | 37
Mon May 04 23:00:00 2020 PDT | 0 | 4
Mon May 04 22:50:00 2020 PDT | 2 | 10
Mon May 04 22:40:00 2020 PDT | 1 | 37
Mon May 04 22:30:00 2020 PDT | 0 | 8
(10 rows)
CREATE MATERIALIZED VIEW daily_temp
WITH (timescaledb.continuous,
timescaledb.materialized_only=true)
AS
SELECT time_bucket('1 day', time) AS day, device, avg(temp) AS avg_temp
FROM conditions
GROUP BY 1,2 WITH NO DATA;
-- The continuous aggregate should be empty
SELECT * FROM daily_temp
ORDER BY day DESC, device;
day | device | avg_temp
-----+--------+----------
(0 rows)
-- Refresh one bucket (1 day):
SHOW timezone;
TimeZone
----------
PST8PDT
(1 row)
-- The refresh of a single bucket must align with the start of the day
-- in the bucket's time zone (which is UTC, since time_bucket doesn't
-- support time zone arg)
CALL refresh_continuous_aggregate('daily_temp', '2020-05-03 00:00 UTC', '2020-05-04 00:00 UTC');
CALL refresh_continuous_aggregate('daily_temp', '2020-05-03 17:00 PDT', '2020-05-04 17:00 PDT');
\set ON_ERROR_STOP 0
\set VERBOSITY default
-- These refreshes will fail since they don't align with the bucket's
-- time zone
CALL refresh_continuous_aggregate('daily_temp', '2020-05-03', '2020-05-04');
psql:include/cagg_refresh_common.sql:43: ERROR: refresh window too small
DETAIL: The refresh window must cover at least one bucket of data.
HINT: Align the refresh window with the bucket time zone or use at least two buckets.
CALL refresh_continuous_aggregate('daily_temp', '2020-05-03 00:00 PDT', '2020-05-04 00:00 PDT');
psql:include/cagg_refresh_common.sql:44: ERROR: refresh window too small
DETAIL: The refresh window must cover at least one bucket of data.
HINT: Align the refresh window with the bucket time zone or use at least two buckets.
-- Refresh window less than one bucket
CALL refresh_continuous_aggregate('daily_temp', '2020-05-03 00:00 UTC', '2020-05-03 23:59 UTC');
psql:include/cagg_refresh_common.sql:47: ERROR: refresh window too small
DETAIL: The refresh window must cover at least one bucket of data.
HINT: Align the refresh window with the bucket time zone or use at least two buckets.
-- Refresh window bigger than one bucket, but failing since it is not
-- aligned with bucket boundaries so that it covers a full bucket:
--
-- Refresh window: [----------)
-- Buckets: [------|------]
CALL refresh_continuous_aggregate('daily_temp', '2020-05-03 01:00 UTC', '2020-05-04 08:00 UTC');
psql:include/cagg_refresh_common.sql:53: ERROR: refresh window too small
DETAIL: The refresh window must cover at least one bucket of data.
HINT: Align the refresh window with the bucket time zone or use at least two buckets.
\set VERBOSITY terse
\set ON_ERROR_STOP 1
-- Refresh the most recent few days:
CALL refresh_continuous_aggregate('daily_temp', '2020-05-02', '2020-05-05 17:00');
SELECT * FROM daily_temp
ORDER BY day DESC, device;
day | device | avg_temp
------------------------------+--------+------------------
Mon May 04 17:00:00 2020 PDT | 0 | 19.3846153846154
Mon May 04 17:00:00 2020 PDT | 1 | 16.5555555555556
Mon May 04 17:00:00 2020 PDT | 2 | 18.5714285714286
Mon May 04 17:00:00 2020 PDT | 3 | 23.5714285714286
Sun May 03 17:00:00 2020 PDT | 0 | 15.7647058823529
Sun May 03 17:00:00 2020 PDT | 1 | 24.3142857142857
Sun May 03 17:00:00 2020 PDT | 2 | 14.8205128205128
Sun May 03 17:00:00 2020 PDT | 3 | 18.1111111111111
Sat May 02 17:00:00 2020 PDT | 0 | 17
Sat May 02 17:00:00 2020 PDT | 1 | 18.75
Sat May 02 17:00:00 2020 PDT | 2 | 20
Sat May 02 17:00:00 2020 PDT | 3 | 21.5217391304348
(12 rows)
-- Refresh the rest (and try DEBUG output)
SET client_min_messages TO DEBUG1;
CALL refresh_continuous_aggregate('daily_temp', '2020-04-30', '2020-05-04');
psql:include/cagg_refresh_common.sql:65: LOG: statement: CALL refresh_continuous_aggregate('daily_temp', '2020-04-30', '2020-05-04');
psql:include/cagg_refresh_common.sql:65: DEBUG: hypertable 1 existing watermark >= new invalidation threshold 1588723200000000 1588550400000000
psql:include/cagg_refresh_common.sql:65: DEBUG: continuous aggregate refresh (individual invalidation) on "daily_temp" in window [ Thu Apr 30 17:00:00 2020 PDT, Sat May 02 17:00:00 2020 PDT ]
psql:include/cagg_refresh_common.sql:65: LOG: inserted 8 row(s) into materialization table "_timescaledb_internal._materialized_hypertable_2"
psql:include/cagg_refresh_common.sql:65: DEBUG: hypertable 2 existing watermark >= new watermark 1588723200000000 1588723200000000
RESET client_min_messages;
psql:include/cagg_refresh_common.sql:66: LOG: statement: RESET client_min_messages;
-- Compare the aggregate to the equivalent query on the source table
SELECT * FROM daily_temp
ORDER BY day DESC, device;
day | device | avg_temp
------------------------------+--------+------------------
Mon May 04 17:00:00 2020 PDT | 0 | 19.3846153846154
Mon May 04 17:00:00 2020 PDT | 1 | 16.5555555555556
Mon May 04 17:00:00 2020 PDT | 2 | 18.5714285714286
Mon May 04 17:00:00 2020 PDT | 3 | 23.5714285714286
Sun May 03 17:00:00 2020 PDT | 0 | 15.7647058823529
Sun May 03 17:00:00 2020 PDT | 1 | 24.3142857142857
Sun May 03 17:00:00 2020 PDT | 2 | 14.8205128205128
Sun May 03 17:00:00 2020 PDT | 3 | 18.1111111111111
Sat May 02 17:00:00 2020 PDT | 0 | 17
Sat May 02 17:00:00 2020 PDT | 1 | 18.75
Sat May 02 17:00:00 2020 PDT | 2 | 20
Sat May 02 17:00:00 2020 PDT | 3 | 21.5217391304348
Fri May 01 17:00:00 2020 PDT | 0 | 19
Fri May 01 17:00:00 2020 PDT | 1 | 15.1463414634146
Fri May 01 17:00:00 2020 PDT | 2 | 19.7674418604651
Fri May 01 17:00:00 2020 PDT | 3 | 22.25
Thu Apr 30 17:00:00 2020 PDT | 0 | 17.6666666666667
Thu Apr 30 17:00:00 2020 PDT | 1 | 18.8333333333333
Thu Apr 30 17:00:00 2020 PDT | 2 | 16.7586206896552
Thu Apr 30 17:00:00 2020 PDT | 3 | 20.76
(20 rows)
SELECT time_bucket('1 day', time) AS day, device, avg(temp) AS avg_temp
FROM conditions
GROUP BY 1,2
ORDER BY 1 DESC,2;
day | device | avg_temp
------------------------------+--------+------------------
Mon May 04 17:00:00 2020 PDT | 0 | 19.3846153846154
Mon May 04 17:00:00 2020 PDT | 1 | 16.5555555555556
Mon May 04 17:00:00 2020 PDT | 2 | 18.5714285714286
Mon May 04 17:00:00 2020 PDT | 3 | 23.5714285714286
Sun May 03 17:00:00 2020 PDT | 0 | 15.7647058823529
Sun May 03 17:00:00 2020 PDT | 1 | 24.3142857142857
Sun May 03 17:00:00 2020 PDT | 2 | 14.8205128205128
Sun May 03 17:00:00 2020 PDT | 3 | 18.1111111111111
Sat May 02 17:00:00 2020 PDT | 0 | 17
Sat May 02 17:00:00 2020 PDT | 1 | 18.75
Sat May 02 17:00:00 2020 PDT | 2 | 20
Sat May 02 17:00:00 2020 PDT | 3 | 21.5217391304348
Fri May 01 17:00:00 2020 PDT | 0 | 19
Fri May 01 17:00:00 2020 PDT | 1 | 15.1463414634146
Fri May 01 17:00:00 2020 PDT | 2 | 19.7674418604651
Fri May 01 17:00:00 2020 PDT | 3 | 22.25
Thu Apr 30 17:00:00 2020 PDT | 0 | 17.6666666666667
Thu Apr 30 17:00:00 2020 PDT | 1 | 18.8333333333333
Thu Apr 30 17:00:00 2020 PDT | 2 | 16.7586206896552
Thu Apr 30 17:00:00 2020 PDT | 3 | 20.76
(20 rows)
-- Test unusual, but valid input
CALL refresh_continuous_aggregate('daily_temp', '2020-05-01'::timestamptz, '2020-05-03'::date);
psql:include/cagg_refresh_common.sql:78: NOTICE: continuous aggregate "daily_temp" is already up-to-date
CALL refresh_continuous_aggregate('daily_temp', '2020-05-01'::date, '2020-05-03'::date);
psql:include/cagg_refresh_common.sql:79: NOTICE: continuous aggregate "daily_temp" is already up-to-date
-- Unbounded window forward in time
CALL refresh_continuous_aggregate('daily_temp', '2020-05-03', NULL);
psql:include/cagg_refresh_common.sql:82: NOTICE: continuous aggregate "daily_temp" is already up-to-date
CALL refresh_continuous_aggregate('daily_temp', NULL, NULL);
-- Unbounded window back in time
CALL refresh_continuous_aggregate('daily_temp', NULL, '2020-05-01');
psql:include/cagg_refresh_common.sql:86: NOTICE: continuous aggregate "daily_temp" is already up-to-date
-- Test bad input
\set ON_ERROR_STOP 0
-- Bad continuous aggregate name
CALL refresh_continuous_aggregate(NULL, '2020-05-03', '2020-05-05');
psql:include/cagg_refresh_common.sql:91: ERROR: invalid continuous aggregate
CALL refresh_continuous_aggregate('xyz', '2020-05-03', '2020-05-05');
psql:include/cagg_refresh_common.sql:92: ERROR: relation "xyz" does not exist at character 35
-- Valid object, but not a continuous aggregate
CALL refresh_continuous_aggregate('conditions', '2020-05-03', '2020-05-05');
psql:include/cagg_refresh_common.sql:94: ERROR: relation "conditions" is not a continuous aggregate
-- Object ID with no object
CALL refresh_continuous_aggregate(1, '2020-05-03', '2020-05-05');
psql:include/cagg_refresh_common.sql:96: ERROR: continuous aggregate does not exist
-- Lacking arguments
CALL refresh_continuous_aggregate('daily_temp');
psql:include/cagg_refresh_common.sql:98: ERROR: procedure refresh_continuous_aggregate(unknown) does not exist at character 6
CALL refresh_continuous_aggregate('daily_temp', '2020-05-03');
psql:include/cagg_refresh_common.sql:99: ERROR: procedure refresh_continuous_aggregate(unknown, unknown) does not exist at character 6
-- Bad time ranges
CALL refresh_continuous_aggregate('daily_temp', 'xyz', '2020-05-05');
psql:include/cagg_refresh_common.sql:101: ERROR: invalid input syntax for type timestamp with time zone: "xyz"
CALL refresh_continuous_aggregate('daily_temp', '2020-05-03', 'xyz');
psql:include/cagg_refresh_common.sql:102: ERROR: invalid input syntax for type timestamp with time zone: "xyz"
CALL refresh_continuous_aggregate('daily_temp', '2020-05-03', '2020-05-01');
psql:include/cagg_refresh_common.sql:103: ERROR: refresh window too small
-- Bad time input
CALL refresh_continuous_aggregate('daily_temp', '2020-05-01'::text, '2020-05-03'::text);
psql:include/cagg_refresh_common.sql:105: ERROR: invalid time argument type "text"
CALL refresh_continuous_aggregate('daily_temp', 0, '2020-05-01');
psql:include/cagg_refresh_common.sql:106: ERROR: invalid time argument type "integer"
\set ON_ERROR_STOP 1
-- Test different time types
CREATE TABLE conditions_date (time date NOT NULL, device int, temp float);
SELECT create_hypertable('conditions_date', 'time');
create_hypertable
------------------------------
(3,public,conditions_date,t)
(1 row)
CREATE MATERIALIZED VIEW daily_temp_date
WITH (timescaledb.continuous,
timescaledb.materialized_only=true)
AS
SELECT time_bucket('1 day', time) AS day, device, avg(temp) AS avg_temp
FROM conditions_date
GROUP BY 1,2 WITH NO DATA;
CALL refresh_continuous_aggregate('daily_temp_date', '2020-05-01', '2020-05-03');
-- Try max refresh window size
CALL refresh_continuous_aggregate('daily_temp_date', NULL, NULL);
-- Test smallint-based continuous aggregate
CREATE TABLE conditions_smallint (time smallint NOT NULL, device int, temp float);
SELECT create_hypertable('conditions_smallint', 'time', chunk_time_interval => 20);
create_hypertable
----------------------------------
(5,public,conditions_smallint,t)
(1 row)
INSERT INTO conditions_smallint
SELECT t, ceil(abs(timestamp_hash(to_timestamp(t)::timestamp))%4)::smallint, abs(timestamp_hash(to_timestamp(t)::timestamp))%40
FROM generate_series(1, 100, 1) t;
CREATE OR REPLACE FUNCTION smallint_now()
RETURNS smallint LANGUAGE SQL STABLE AS
$$
SELECT coalesce(max(time), 0)::smallint
FROM conditions_smallint
$$;
\set ON_ERROR_STOP 0
-- First try to create an integer-based continuous aggregate without
-- an now function. This should not be allowed.
CREATE MATERIALIZED VIEW cond_20_smallint
WITH (timescaledb.continuous,
timescaledb.materialized_only=true)
AS
SELECT time_bucket(SMALLINT '20', time) AS bucket, device, avg(temp) AS avg_temp
FROM conditions_smallint c
GROUP BY 1,2 WITH NO DATA;
psql:include/cagg_refresh_common.sql:150: ERROR: custom time function required on hypertable "conditions_smallint"
\set ON_ERROR_STOP 1
SELECT set_integer_now_func('conditions_smallint', 'smallint_now');
set_integer_now_func
----------------------
(1 row)
CREATE MATERIALIZED VIEW cond_20_smallint
WITH (timescaledb.continuous,
timescaledb.materialized_only=true)
AS
SELECT time_bucket(SMALLINT '20', time) AS bucket, device, avg(temp) AS avg_temp
FROM conditions_smallint c
GROUP BY 1,2 WITH NO DATA;
CALL refresh_continuous_aggregate('cond_20_smallint', 0::smallint, 70::smallint);
SELECT * FROM cond_20_smallint
ORDER BY 1,2;
bucket | device | avg_temp
--------+--------+------------------
0 | 0 | 6
0 | 1 | 19
0 | 2 | 14.5
0 | 3 | 21.4
20 | 0 | 15
20 | 1 | 16
20 | 2 | 23.3333333333333
20 | 3 | 13.6666666666667
40 | 0 | 21
40 | 1 | 19.4
40 | 2 | 22
40 | 3 | 21.4
(12 rows)
-- Try max refresh window size
CALL refresh_continuous_aggregate('cond_20_smallint', NULL, NULL);
-- Test int-based continuous aggregate
CREATE TABLE conditions_int (time int NOT NULL, device int, temp float);
SELECT create_hypertable('conditions_int', 'time', chunk_time_interval => 20);
create_hypertable
-----------------------------
(7,public,conditions_int,t)
(1 row)
INSERT INTO conditions_int
SELECT t, ceil(abs(timestamp_hash(to_timestamp(t)::timestamp))%4)::int, abs(timestamp_hash(to_timestamp(t)::timestamp))%40
FROM generate_series(1, 100, 1) t;
CREATE OR REPLACE FUNCTION int_now()
RETURNS int LANGUAGE SQL STABLE AS
$$
SELECT coalesce(max(time), 0)
FROM conditions_int
$$;
SELECT set_integer_now_func('conditions_int', 'int_now');
set_integer_now_func
----------------------
(1 row)
CREATE MATERIALIZED VIEW cond_20_int
WITH (timescaledb.continuous,
timescaledb.materialized_only=true)
AS
SELECT time_bucket(INT '20', time) AS bucket, device, avg(temp) AS avg_temp
FROM conditions_int
GROUP BY 1,2 WITH NO DATA;
CALL refresh_continuous_aggregate('cond_20_int', 0, 65);
SELECT * FROM cond_20_int
ORDER BY 1,2;
bucket | device | avg_temp
--------+--------+------------------
0 | 0 | 6
0 | 1 | 19
0 | 2 | 14.5
0 | 3 | 21.4
20 | 0 | 15
20 | 1 | 16
20 | 2 | 23.3333333333333
20 | 3 | 13.6666666666667
40 | 0 | 21
40 | 1 | 19.4
40 | 2 | 22
40 | 3 | 21.4
(12 rows)
-- Try max refresh window size
CALL refresh_continuous_aggregate('cond_20_int', NULL, NULL);
-- Test bigint-based continuous aggregate
CREATE TABLE conditions_bigint (time bigint NOT NULL, device int, temp float);
SELECT create_hypertable('conditions_bigint', 'time', chunk_time_interval => 20);
create_hypertable
--------------------------------
(9,public,conditions_bigint,t)
(1 row)
INSERT INTO conditions_bigint
SELECT t, ceil(abs(timestamp_hash(to_timestamp(t)::timestamp))%4)::bigint, abs(timestamp_hash(to_timestamp(t)::timestamp))%40
FROM generate_series(1, 100, 1) t;
CREATE OR REPLACE FUNCTION bigint_now()
RETURNS bigint LANGUAGE SQL STABLE AS
$$
SELECT coalesce(max(time), 0)::bigint
FROM conditions_bigint
$$;
SELECT set_integer_now_func('conditions_bigint', 'bigint_now');
set_integer_now_func
----------------------
(1 row)
CREATE MATERIALIZED VIEW cond_20_bigint
WITH (timescaledb.continuous,
timescaledb.materialized_only=true)
AS
SELECT time_bucket(BIGINT '20', time) AS bucket, device, avg(temp) AS avg_temp
FROM conditions_bigint
GROUP BY 1,2 WITH NO DATA;
CALL refresh_continuous_aggregate('cond_20_bigint', 0, 75);
SELECT * FROM cond_20_bigint
ORDER BY 1,2;
bucket | device | avg_temp
--------+--------+------------------
0 | 0 | 6
0 | 1 | 19
0 | 2 | 14.5
0 | 3 | 21.4
20 | 0 | 15
20 | 1 | 16
20 | 2 | 23.3333333333333
20 | 3 | 13.6666666666667
40 | 0 | 21
40 | 1 | 19.4
40 | 2 | 22
40 | 3 | 21.4
(12 rows)
-- Try max refresh window size
CALL refresh_continuous_aggregate('cond_20_bigint', NULL, NULL);
-- Test that WITH NO DATA and WITH DATA works (we use whatever is the
-- default for Postgres, so we do not need to have test for the
-- default).
CREATE MATERIALIZED VIEW weekly_temp_without_data
WITH (timescaledb.continuous,
timescaledb.materialized_only=true)
AS
SELECT time_bucket('7 days', time) AS day, device, avg(temp) AS avg_temp
FROM conditions
GROUP BY 1,2 WITH NO DATA;
CREATE MATERIALIZED VIEW weekly_temp_with_data
WITH (timescaledb.continuous,
timescaledb.materialized_only=true)
AS
SELECT time_bucket('7 days', time) AS day, device, avg(temp) AS avg_temp
FROM conditions
GROUP BY 1,2 WITH DATA;
psql:include/cagg_refresh_common.sql:255: NOTICE: refreshing continuous aggregate "weekly_temp_with_data"
SELECT * FROM weekly_temp_without_data;
day | device | avg_temp
-----+--------+----------
(0 rows)
SELECT * FROM weekly_temp_with_data ORDER BY 1,2;
day | device | avg_temp
------------------------------+--------+------------------
Sun Apr 26 17:00:00 2020 PDT | 0 | 17.8181818181818
Sun Apr 26 17:00:00 2020 PDT | 1 | 17.2474226804124
Sun Apr 26 17:00:00 2020 PDT | 2 | 18.9803921568627
Sun Apr 26 17:00:00 2020 PDT | 3 | 21.5631067961165
Sun May 03 17:00:00 2020 PDT | 0 | 16.7659574468085
Sun May 03 17:00:00 2020 PDT | 1 | 22.7272727272727
Sun May 03 17:00:00 2020 PDT | 2 | 15.811320754717
Sun May 03 17:00:00 2020 PDT | 3 | 19
(8 rows)
\set ON_ERROR_STOP 0
-- REFRESH MATERIALIZED VIEW is blocked on continuous aggregates
REFRESH MATERIALIZED VIEW weekly_temp_without_data;
psql:include/cagg_refresh_common.sql:262: ERROR: operation not supported on continuous aggregate
-- These should fail since we do not allow refreshing inside a
-- transaction, not even as part of CREATE MATERIALIZED VIEW.
DO LANGUAGE PLPGSQL $$ BEGIN
CREATE MATERIALIZED VIEW weekly_conditions
WITH (timescaledb.continuous,
timescaledb.materialized_only=true)
AS
SELECT time_bucket('7 days', time) AS day, device, avg(temp) AS avg_temp
FROM conditions
GROUP BY 1,2 WITH DATA;
END $$;
psql:include/cagg_refresh_common.sql:274: ERROR: CREATE MATERIALIZED VIEW ... WITH DATA cannot be executed from a function
BEGIN;
CREATE MATERIALIZED VIEW weekly_conditions
WITH (timescaledb.continuous,
timescaledb.materialized_only=true)
AS
SELECT time_bucket('7 days', time) AS day, device, avg(temp) AS avg_temp
FROM conditions
GROUP BY 1,2 WITH DATA;
psql:include/cagg_refresh_common.sql:283: ERROR: CREATE MATERIALIZED VIEW ... WITH DATA cannot run inside a transaction block
COMMIT;
\set ON_ERROR_STOP 1
-- This should not fail since we do not refresh the continuous
-- aggregate.
DO LANGUAGE PLPGSQL $$ BEGIN
CREATE MATERIALIZED VIEW weekly_conditions_1
WITH (timescaledb.continuous,
timescaledb.materialized_only=true)
AS
SELECT time_bucket('7 days', time) AS day, device, avg(temp) AS avg_temp
FROM conditions
GROUP BY 1,2 WITH NO DATA;
END $$;
BEGIN;
CREATE MATERIALIZED VIEW weekly_conditions_2
WITH (timescaledb.continuous,
timescaledb.materialized_only=true)
AS
SELECT time_bucket('7 days', time) AS day, device, avg(temp) AS avg_temp
FROM conditions
GROUP BY 1,2 WITH NO DATA;
COMMIT;
-- Additional tests for MERGE refresh
DROP TABLE conditions CASCADE;
NOTICE: drop cascades to 10 other objects
NOTICE: drop cascades to table _timescaledb_internal._hyper_2_2_chunk
NOTICE: drop cascades to table _timescaledb_internal._hyper_12_24_chunk
CREATE TABLE conditions (
time TIMESTAMPTZ NOT NULL,
location TEXT NOT NULL,
temperature DOUBLE PRECISION,
humidity DOUBLE PRECISION
);
SELECT FROM create_hypertable( 'conditions', 'time');
--
(1 row)
INSERT INTO conditions
VALUES
('2018-01-01 09:20:00-08', 'SFO', 55, 45),
('2018-01-02 09:30:00-08', 'POR', 100, 100),
('2018-01-02 09:20:00-08', 'SFO', 65, 45),
('2018-01-02 09:10:00-08', 'NYC', 65, 45),
('2018-11-01 09:20:00-08', 'NYC', 45, 30),
('2018-11-01 10:40:00-08', 'NYC', 55, 35),
('2018-11-01 11:50:00-08', 'NYC', 65, 40),
('2018-11-01 12:10:00-08', 'NYC', 75, 45),
('2018-11-01 13:10:00-08', 'NYC', 85, 50),
('2018-11-02 09:20:00-08', 'NYC', 10, 10),
('2018-11-02 10:30:00-08', 'NYC', 20, 15),
('2018-11-02 11:40:00-08', 'NYC', null, null),
('2018-11-03 09:50:00-08', 'NYC', null, null);
CREATE MATERIALIZED VIEW conditions_daily
WITH (timescaledb.continuous) AS
SELECT
time_bucket(INTERVAL '1 day', time) AS bucket,
location,
AVG(temperature),
MAX(temperature),
MIN(temperature)
FROM conditions
GROUP BY bucket, location
WITH NO DATA;
-- First refresh using MERGE should fall back to INSERT
SET client_min_messages TO LOG;
CALL refresh_continuous_aggregate('conditions_daily', NULL, '2018-11-01 23:59:59-08');
LOG: statement: CALL refresh_continuous_aggregate('conditions_daily', NULL, '2018-11-01 23:59:59-08');
LOG: inserted 5 row(s) into materialization table "_timescaledb_internal._materialized_hypertable_16"
SELECT * FROM conditions_daily ORDER BY 1, 2, 3 NULLS LAST, 4 NULLS LAST, 5 NULLS LAST;
LOG: statement: SELECT * FROM conditions_daily ORDER BY 1, 2, 3 NULLS LAST, 4 NULLS LAST, 5 NULLS LAST;
bucket | location | avg | max | min
------------------------------+----------+-----+-----+-----
Sun Dec 31 16:00:00 2017 PST | SFO | 55 | 55 | 55
Mon Jan 01 16:00:00 2018 PST | NYC | 65 | 65 | 65
Mon Jan 01 16:00:00 2018 PST | POR | 100 | 100 | 100
Mon Jan 01 16:00:00 2018 PST | SFO | 65 | 65 | 65
Wed Oct 31 17:00:00 2018 PDT | NYC | 65 | 85 | 45
(5 rows)
-- Second refresh using MERGE should also fall back to INSERT since there's no data in the materialization hypertable
CALL refresh_continuous_aggregate('conditions_daily', '2018-11-01', NULL);
LOG: statement: CALL refresh_continuous_aggregate('conditions_daily', '2018-11-01', NULL);
LOG: inserted 2 row(s) into materialization table "_timescaledb_internal._materialized_hypertable_16"
SELECT * FROM conditions_daily ORDER BY 1, 2, 3 NULLS LAST, 4 NULLS LAST, 5 NULLS LAST;
LOG: statement: SELECT * FROM conditions_daily ORDER BY 1, 2, 3 NULLS LAST, 4 NULLS LAST, 5 NULLS LAST;
bucket | location | avg | max | min
------------------------------+----------+-----+-----+-----
Sun Dec 31 16:00:00 2017 PST | SFO | 55 | 55 | 55
Mon Jan 01 16:00:00 2018 PST | NYC | 65 | 65 | 65
Mon Jan 01 16:00:00 2018 PST | POR | 100 | 100 | 100
Mon Jan 01 16:00:00 2018 PST | SFO | 65 | 65 | 65
Wed Oct 31 17:00:00 2018 PDT | NYC | 65 | 85 | 45
Thu Nov 01 17:00:00 2018 PDT | NYC | 15 | 20 | 10
Fri Nov 02 17:00:00 2018 PDT | NYC | | |
(7 rows)
-- All data should be in the materialization hypertable
CALL refresh_continuous_aggregate('conditions_daily', NULL, NULL);
LOG: statement: CALL refresh_continuous_aggregate('conditions_daily', NULL, NULL);
NOTICE: continuous aggregate "conditions_daily" is already up-to-date
SELECT * FROM conditions_daily ORDER BY 1, 2, 3 NULLS LAST, 4 NULLS LAST, 5 NULLS LAST;
LOG: statement: SELECT * FROM conditions_daily ORDER BY 1, 2, 3 NULLS LAST, 4 NULLS LAST, 5 NULLS LAST;
bucket | location | avg | max | min
------------------------------+----------+-----+-----+-----
Sun Dec 31 16:00:00 2017 PST | SFO | 55 | 55 | 55
Mon Jan 01 16:00:00 2018 PST | NYC | 65 | 65 | 65
Mon Jan 01 16:00:00 2018 PST | POR | 100 | 100 | 100
Mon Jan 01 16:00:00 2018 PST | SFO | 65 | 65 | 65
Wed Oct 31 17:00:00 2018 PDT | NYC | 65 | 85 | 45
Thu Nov 01 17:00:00 2018 PDT | NYC | 15 | 20 | 10
Fri Nov 02 17:00:00 2018 PDT | NYC | | |
(7 rows)
-- Changing past data that is not part of the cagg
UPDATE conditions SET humidity = humidity + 100 WHERE time = '2018-01-02 09:20:00-08' AND location = 'SFO';
LOG: statement: UPDATE conditions SET humidity = humidity + 100 WHERE time = '2018-01-02 09:20:00-08' AND location = 'SFO';
-- Shoudn't affect the materialization hypertable (merged=0 and deleted=0)
CALL refresh_continuous_aggregate('conditions_daily', NULL, NULL);
LOG: statement: CALL refresh_continuous_aggregate('conditions_daily', NULL, NULL);
LOG: merged 0 row(s) into materialization table "_timescaledb_internal._materialized_hypertable_16"
LOG: deleted 0 row(s) from materialization table "_timescaledb_internal._materialized_hypertable_16"
SELECT * FROM conditions_daily ORDER BY 1, 2, 3 NULLS LAST, 4 NULLS LAST, 5 NULLS LAST;
LOG: statement: SELECT * FROM conditions_daily ORDER BY 1, 2, 3 NULLS LAST, 4 NULLS LAST, 5 NULLS LAST;
bucket | location | avg | max | min
------------------------------+----------+-----+-----+-----
Sun Dec 31 16:00:00 2017 PST | SFO | 55 | 55 | 55
Mon Jan 01 16:00:00 2018 PST | NYC | 65 | 65 | 65
Mon Jan 01 16:00:00 2018 PST | POR | 100 | 100 | 100
Mon Jan 01 16:00:00 2018 PST | SFO | 65 | 65 | 65
Wed Oct 31 17:00:00 2018 PDT | NYC | 65 | 85 | 45
Thu Nov 01 17:00:00 2018 PDT | NYC | 15 | 20 | 10
Fri Nov 02 17:00:00 2018 PDT | NYC | | |
(7 rows)
-- Backfill some data in the past
INSERT INTO conditions
VALUES
('2017-01-01 09:20:00-08', 'GRU', 55, 45),
('2017-01-01 09:30:00-08', 'POA', 100, 100),
('2017-01-02 09:20:00-08', 'CNF', 65, 45);
LOG: statement: INSERT INTO conditions
VALUES
('2017-01-01 09:20:00-08', 'GRU', 55, 45),
('2017-01-01 09:30:00-08', 'POA', 100, 100),
('2017-01-02 09:20:00-08', 'CNF', 65, 45);
-- There's no data in the affected range so the refresh should fall back to INSERT
CALL refresh_continuous_aggregate('conditions_daily', NULL, NULL);
LOG: statement: CALL refresh_continuous_aggregate('conditions_daily', NULL, NULL);
LOG: inserted 3 row(s) into materialization table "_timescaledb_internal._materialized_hypertable_16"
SELECT * FROM conditions_daily ORDER BY 1, 2, 3 NULLS LAST, 4 NULLS LAST, 5 NULLS LAST;
LOG: statement: SELECT * FROM conditions_daily ORDER BY 1, 2, 3 NULLS LAST, 4 NULLS LAST, 5 NULLS LAST;
bucket | location | avg | max | min
------------------------------+----------+-----+-----+-----
Sat Dec 31 16:00:00 2016 PST | GRU | 55 | 55 | 55
Sat Dec 31 16:00:00 2016 PST | POA | 100 | 100 | 100
Sun Jan 01 16:00:00 2017 PST | CNF | 65 | 65 | 65
Sun Dec 31 16:00:00 2017 PST | SFO | 55 | 55 | 55
Mon Jan 01 16:00:00 2018 PST | NYC | 65 | 65 | 65
Mon Jan 01 16:00:00 2018 PST | POR | 100 | 100 | 100
Mon Jan 01 16:00:00 2018 PST | SFO | 65 | 65 | 65
Wed Oct 31 17:00:00 2018 PDT | NYC | 65 | 85 | 45
Thu Nov 01 17:00:00 2018 PDT | NYC | 15 | 20 | 10
Fri Nov 02 17:00:00 2018 PDT | NYC | | |
(10 rows)
-- Update already materilized data in the past
UPDATE conditions SET temperature = temperature + 100 WHERE time = '2018-11-02 10:30:00-08' AND location = 'NYC';
LOG: statement: UPDATE conditions SET temperature = temperature + 100 WHERE time = '2018-11-02 10:30:00-08' AND location = 'NYC';
-- Should merge 1 bucket (merged=1 and deleted=0)
CALL refresh_continuous_aggregate('conditions_daily', NULL, NULL);
LOG: statement: CALL refresh_continuous_aggregate('conditions_daily', NULL, NULL);
LOG: merged 1 row(s) into materialization table "_timescaledb_internal._materialized_hypertable_16"
LOG: deleted 0 row(s) from materialization table "_timescaledb_internal._materialized_hypertable_16"
SELECT * FROM conditions_daily ORDER BY 1, 2, 3 NULLS LAST, 4 NULLS LAST, 5 NULLS LAST;
LOG: statement: SELECT * FROM conditions_daily ORDER BY 1, 2, 3 NULLS LAST, 4 NULLS LAST, 5 NULLS LAST;
bucket | location | avg | max | min
------------------------------+----------+-----+-----+-----
Sat Dec 31 16:00:00 2016 PST | GRU | 55 | 55 | 55
Sat Dec 31 16:00:00 2016 PST | POA | 100 | 100 | 100
Sun Jan 01 16:00:00 2017 PST | CNF | 65 | 65 | 65
Sun Dec 31 16:00:00 2017 PST | SFO | 55 | 55 | 55
Mon Jan 01 16:00:00 2018 PST | NYC | 65 | 65 | 65
Mon Jan 01 16:00:00 2018 PST | POR | 100 | 100 | 100
Mon Jan 01 16:00:00 2018 PST | SFO | 65 | 65 | 65
Wed Oct 31 17:00:00 2018 PDT | NYC | 65 | 85 | 45
Thu Nov 01 17:00:00 2018 PDT | NYC | 65 | 120 | 10
Fri Nov 02 17:00:00 2018 PDT | NYC | | |
(10 rows)
-- Delete one entire bucket
DELETE FROM conditions WHERE time >= '2018-11-02' AND time < '2018-11-03' AND location = 'NYC';
LOG: statement: DELETE FROM conditions WHERE time >= '2018-11-02' AND time < '2018-11-03' AND location = 'NYC';
-- Should not merge any bucket but delete one bucket (merged=0 and deleted=1)
CALL refresh_continuous_aggregate('conditions_daily', NULL, NULL);
LOG: statement: CALL refresh_continuous_aggregate('conditions_daily', NULL, NULL);
LOG: merged 0 row(s) into materialization table "_timescaledb_internal._materialized_hypertable_16"
LOG: deleted 1 row(s) from materialization table "_timescaledb_internal._materialized_hypertable_16"
SELECT * FROM conditions_daily ORDER BY 1, 2, 3 NULLS LAST, 4 NULLS LAST, 5 NULLS LAST;
LOG: statement: SELECT * FROM conditions_daily ORDER BY 1, 2, 3 NULLS LAST, 4 NULLS LAST, 5 NULLS LAST;
bucket | location | avg | max | min
------------------------------+----------+-----+-----+-----
Sat Dec 31 16:00:00 2016 PST | GRU | 55 | 55 | 55
Sat Dec 31 16:00:00 2016 PST | POA | 100 | 100 | 100
Sun Jan 01 16:00:00 2017 PST | CNF | 65 | 65 | 65
Sun Dec 31 16:00:00 2017 PST | SFO | 55 | 55 | 55
Mon Jan 01 16:00:00 2018 PST | NYC | 65 | 65 | 65
Mon Jan 01 16:00:00 2018 PST | POR | 100 | 100 | 100
Mon Jan 01 16:00:00 2018 PST | SFO | 65 | 65 | 65
Wed Oct 31 17:00:00 2018 PDT | NYC | 65 | 85 | 45
Fri Nov 02 17:00:00 2018 PDT | NYC | | |
(9 rows)