mirror of
https://github.com/timescale/timescaledb.git
synced 2025-05-23 06:22:03 +08:00
PostgreSQL 11 added support for query plans that do partitionwise aggregation on partitioned tables. Such query plans push down aggregates to individual partitions (either fully or partially) for similar or better performance than regular plans due to, among other things, improved locking. The changes in this commit adds the corresponding partitionwise aggregation functionality for hypertables. To enable this functionality on hypertables, we add partitioning metadata at the planning stage to make the regular PostgreSQL planner believe it is planning a partitioned table. Alternatively, we could have added the corresponding planner paths in our own code, e.g., in the create_upper_paths_hook, but this would require copying or re-implementing a large amount of PostgreSQL planning code. Note that partitionwise aggregation will only work with PostgreSQL 11. As a side effect of making hypertables look like partitioned tables during planning, some append plans will differ because the planner removes any Result projection nodes from such plans, knowing it can push projections down to the partitions instead. This also affects a number of query-related tests, so these have been split into version-specific tests.
271 lines
7.6 KiB
PL/PgSQL
271 lines
7.6 KiB
PL/PgSQL
-- This file and its contents are licensed under the Apache License 2.0.
|
|
-- Please see the included NOTICE for copyright information and
|
|
-- LICENSE-APACHE for a copy of the license.
|
|
|
|
-- Create a two dimensional hypertable
|
|
CREATE TABLE hyper (time timestamptz, device int, temp float);
|
|
SELECT * FROM create_hypertable('hyper', 'time', 'device', 2);
|
|
|
|
-- Create a similar PostgreSQL partitioned table
|
|
CREATE TABLE pg2dim (time timestamptz, device int, temp float) PARTITION BY HASH (device);
|
|
CREATE TABLE pg2dim_h1 PARTITION OF pg2dim FOR VALUES WITH (MODULUS 2, REMAINDER 0) PARTITION BY RANGE(time);
|
|
CREATE TABLE pg2dim_h2 PARTITION OF pg2dim FOR VALUES WITH (MODULUS 2, REMAINDER 1) PARTITION BY RANGE(time);
|
|
CREATE TABLE pg2dim_h1_t1 PARTITION OF pg2dim_h1 FOR VALUES FROM ('2018-01-01 00:00') TO ('2018-09-01 00:00');
|
|
CREATE TABLE pg2dim_h1_t2 PARTITION OF pg2dim_h1 FOR VALUES FROM ('2018-09-01 00:00') TO ('2018-12-01 00:00');
|
|
CREATE TABLE pg2dim_h2_t1 PARTITION OF pg2dim_h2 FOR VALUES FROM ('2018-01-01 00:00') TO ('2018-09-01 00:00');
|
|
CREATE TABLE pg2dim_h2_t2 PARTITION OF pg2dim_h2 FOR VALUES FROM ('2018-09-01 00:00') TO ('2018-12-01 00:00');
|
|
|
|
-- Create a 1-dimensional partitioned table for comparison
|
|
CREATE TABLE pg1dim (time timestamptz, device int, temp float) PARTITION BY HASH (device);
|
|
CREATE TABLE pg1dim_h1 PARTITION OF pg1dim FOR VALUES WITH (MODULUS 2, REMAINDER 0);
|
|
CREATE TABLE pg1dim_h2 PARTITION OF pg1dim FOR VALUES WITH (MODULUS 2, REMAINDER 1);
|
|
|
|
INSERT INTO hyper VALUES
|
|
('2018-02-19 13:01', 1, 2.3),
|
|
('2018-02-19 13:02', 3, 3.1),
|
|
('2018-10-19 13:01', 1, 7.6),
|
|
('2018-10-19 13:02', 3, 9.0);
|
|
|
|
INSERT INTO pg2dim VALUES
|
|
('2018-02-19 13:01', 1, 2.3),
|
|
('2018-02-19 13:02', 3, 3.1),
|
|
('2018-10-19 13:01', 1, 7.6),
|
|
('2018-10-19 13:02', 3, 9.0);
|
|
|
|
INSERT INTO pg1dim VALUES
|
|
('2018-02-19 13:01', 1, 2.3),
|
|
('2018-02-19 13:02', 3, 3.1),
|
|
('2018-10-19 13:01', 1, 7.6),
|
|
('2018-10-19 13:02', 3, 9.0);
|
|
|
|
SELECT * FROM test.show_subtables('hyper');
|
|
|
|
SELECT * FROM pg2dim_h1_t1;
|
|
SELECT * FROM pg2dim_h1_t2;
|
|
SELECT * FROM pg2dim_h2_t1;
|
|
SELECT * FROM pg2dim_h2_t2;
|
|
|
|
|
|
-- Compare partitionwise aggreate enabled/disabled. First run queries
|
|
-- on PG partitioned tables for reference.
|
|
|
|
-- All partition keys covered by GROUP BY
|
|
SET enable_partitionwise_aggregate = 'off';
|
|
EXPLAIN (VERBOSE, COSTS OFF)
|
|
SELECT device, avg(temp)
|
|
FROM pg1dim
|
|
GROUP BY 1
|
|
ORDER BY 1;
|
|
|
|
SET enable_partitionwise_aggregate = 'on';
|
|
EXPLAIN (VERBOSE, COSTS OFF)
|
|
SELECT device, avg(temp)
|
|
FROM pg1dim
|
|
GROUP BY 1
|
|
ORDER BY 1;
|
|
|
|
-- All partition keys not covered by GROUP BY (partial partitionwise)
|
|
SET enable_partitionwise_aggregate = 'off';
|
|
EXPLAIN (VERBOSE, COSTS OFF)
|
|
SELECT device, avg(temp)
|
|
FROM pg2dim
|
|
GROUP BY 1
|
|
ORDER BY 1;
|
|
|
|
SET enable_partitionwise_aggregate = 'on';
|
|
EXPLAIN (VERBOSE, COSTS OFF)
|
|
SELECT device, avg(temp)
|
|
FROM pg2dim
|
|
GROUP BY 1
|
|
ORDER BY 1;
|
|
|
|
-- All partition keys covered by GROUP BY (full partitionwise)
|
|
SET enable_partitionwise_aggregate = 'off';
|
|
EXPLAIN (VERBOSE, COSTS OFF)
|
|
SELECT time, device, avg(temp)
|
|
FROM pg2dim
|
|
GROUP BY 1, 2
|
|
ORDER BY 1, 2;
|
|
|
|
SET enable_partitionwise_aggregate = 'on';
|
|
EXPLAIN (VERBOSE, COSTS OFF)
|
|
SELECT time, device, avg(temp)
|
|
FROM pg2dim
|
|
GROUP BY 1, 2
|
|
ORDER BY 1, 2;
|
|
|
|
-- All partition keys not covered by GROUP BY because of date_trunc
|
|
-- expression on time (partial partitionwise)
|
|
SET enable_partitionwise_aggregate = 'off';
|
|
EXPLAIN (VERBOSE, COSTS OFF)
|
|
SELECT date_trunc('month', time), device, avg(temp)
|
|
FROM pg2dim
|
|
GROUP BY 1, 2
|
|
ORDER BY 1, 2;
|
|
|
|
SET enable_partitionwise_aggregate = 'on';
|
|
EXPLAIN (VERBOSE, COSTS OFF)
|
|
SELECT date_trunc('month', time), device, avg(temp)
|
|
FROM pg2dim
|
|
GROUP BY 1, 2
|
|
ORDER BY 1, 2;
|
|
|
|
-- Now run on hypertable
|
|
|
|
-- All partition keys not covered by GROUP BY (partial partitionwise)
|
|
SET enable_partitionwise_aggregate = 'off';
|
|
EXPLAIN (VERBOSE, COSTS OFF)
|
|
SELECT device, avg(temp)
|
|
FROM hyper
|
|
GROUP BY 1
|
|
ORDER BY 1;
|
|
|
|
SET enable_partitionwise_aggregate = 'on';
|
|
EXPLAIN (VERBOSE, COSTS OFF)
|
|
SELECT device, avg(temp)
|
|
FROM hyper
|
|
GROUP BY 1
|
|
ORDER BY 1;
|
|
|
|
-- All partition keys covered (full partitionwise)
|
|
SET enable_partitionwise_aggregate = 'off';
|
|
EXPLAIN (VERBOSE, COSTS OFF)
|
|
SELECT time, device, avg(temp)
|
|
FROM hyper
|
|
GROUP BY 1, 2
|
|
ORDER BY 1, 2;
|
|
|
|
SET enable_partitionwise_aggregate = 'on';
|
|
EXPLAIN (VERBOSE, COSTS OFF)
|
|
SELECT time, device, avg(temp)
|
|
FROM hyper
|
|
GROUP BY 1, 2
|
|
ORDER BY 1, 2;
|
|
|
|
-- Partial aggregation since date_trunc(time) is not a partition key
|
|
SET enable_partitionwise_aggregate = 'off';
|
|
EXPLAIN (VERBOSE, COSTS OFF)
|
|
SELECT date_trunc('month', time), device, avg(temp)
|
|
FROM hyper
|
|
GROUP BY 1, 2
|
|
ORDER BY 1, 2;
|
|
|
|
SET enable_partitionwise_aggregate = 'on';
|
|
EXPLAIN (VERBOSE, COSTS OFF)
|
|
SELECT date_trunc('month', time), device, avg(temp)
|
|
FROM hyper
|
|
GROUP BY 1, 2
|
|
ORDER BY 1, 2;
|
|
|
|
-- Also test time_bucket
|
|
SET enable_partitionwise_aggregate = 'off';
|
|
EXPLAIN (VERBOSE, COSTS OFF)
|
|
SELECT time_bucket('1 month', time), device, avg(temp)
|
|
FROM hyper
|
|
GROUP BY 1, 2
|
|
ORDER BY 1, 2;
|
|
|
|
SET enable_partitionwise_aggregate = 'on';
|
|
EXPLAIN (VERBOSE, COSTS OFF)
|
|
SELECT time_bucket('1 month', time), device, avg(temp)
|
|
FROM hyper
|
|
GROUP BY 1, 2
|
|
ORDER BY 1, 2;
|
|
|
|
-- Test partitionwise joins, mostly to see that we do not break
|
|
-- anything
|
|
CREATE TABLE hyper_meta (time timestamptz, device int, info text);
|
|
SELECT * FROM create_hypertable('hyper_meta', 'time', 'device', 2);
|
|
|
|
INSERT INTO hyper_meta VALUES
|
|
('2018-02-19 13:01', 1, 'device_1'),
|
|
('2018-02-19 13:02', 3, 'device_3');
|
|
|
|
SET enable_partitionwise_join = 'off';
|
|
|
|
EXPLAIN
|
|
SELECT h.time, h.device, h.temp, hm.info
|
|
FROM hyper h, hyper_meta hm
|
|
WHERE h.device = hm.device;
|
|
|
|
EXPLAIN
|
|
SELECT pg2.time, pg2.device, pg2.temp, pg1.temp
|
|
FROM pg2dim pg2, pg1dim pg1
|
|
WHERE pg2.device = pg1.device;
|
|
|
|
SET enable_partitionwise_join = 'on';
|
|
|
|
EXPLAIN
|
|
SELECT h.time, h.device, h.temp, hm.info
|
|
FROM hyper h, hyper_meta hm
|
|
WHERE h.device = hm.device;
|
|
|
|
EXPLAIN
|
|
SELECT pg2.time, pg2.device, pg2.temp, pg1.temp
|
|
FROM pg2dim pg2, pg1dim pg1
|
|
WHERE pg2.device = pg1.device;
|
|
|
|
-- Test hypertable with time partitioning function
|
|
CREATE OR REPLACE FUNCTION time_func(unixtime float8)
|
|
RETURNS TIMESTAMPTZ LANGUAGE PLPGSQL IMMUTABLE AS
|
|
$BODY$
|
|
DECLARE
|
|
retval TIMESTAMPTZ;
|
|
BEGIN
|
|
retval := to_timestamp(unixtime);
|
|
RETURN retval;
|
|
END
|
|
$BODY$;
|
|
|
|
CREATE TABLE hyper_timepart (time float8, device int, temp float);
|
|
SELECT * FROM create_hypertable('hyper_timepart', 'time', 'device', 2, time_partitioning_func => 'time_func');
|
|
|
|
-- Planner won't pick push-down aggs on table with time function
|
|
-- unless a certain amount of data
|
|
SELECT setseed(1);
|
|
INSERT INTO hyper_timepart
|
|
SELECT x, ceil(random() * 8), random() * 20
|
|
FROM generate_series(0,5000-1) AS x;
|
|
|
|
-- All partition keys covered (full partitionwise)
|
|
SET enable_partitionwise_aggregate = 'off';
|
|
EXPLAIN (VERBOSE, COSTS OFF)
|
|
SELECT time, device, avg(temp)
|
|
FROM hyper_timepart
|
|
GROUP BY 1, 2
|
|
ORDER BY 1, 2
|
|
LIMIT 10;
|
|
|
|
EXPLAIN (VERBOSE, COSTS OFF)
|
|
SELECT time_func(time), device, avg(temp)
|
|
FROM hyper_timepart
|
|
GROUP BY 1, 2
|
|
ORDER BY 1, 2
|
|
LIMIT 10;
|
|
|
|
-- Grouping on original time column should be pushed-down
|
|
SET enable_partitionwise_aggregate = 'on';
|
|
EXPLAIN (VERBOSE, COSTS OFF)
|
|
SELECT time, device, avg(temp)
|
|
FROM hyper_timepart
|
|
GROUP BY 1, 2
|
|
ORDER BY 1, 2
|
|
LIMIT 10;
|
|
|
|
-- Applying the time partitioning function should also allow push-down
|
|
-- on open dimensions
|
|
EXPLAIN (VERBOSE, COSTS OFF)
|
|
SELECT time_func(time), device, avg(temp)
|
|
FROM hyper_timepart
|
|
GROUP BY 1, 2
|
|
ORDER BY 1, 2
|
|
LIMIT 10;
|
|
|
|
-- Should also work to use partitioning function on closed dimensions
|
|
EXPLAIN (VERBOSE, COSTS OFF)
|
|
SELECT time_func(time), _timescaledb_internal.get_partition_hash(device), avg(temp)
|
|
FROM hyper_timepart
|
|
GROUP BY 1, 2
|
|
ORDER BY 1, 2
|
|
LIMIT 10;
|