Compute basic data node rel stats from chunks

Planning of a data node rel during a distributed query should use the
accumulated stats from the individual chunks that the data node rel
represents. Since the data node rel is not a real base rel (i.e., it
doesn't correspond to a real relation) it doesn't have any statistics
in the `pg_catalog` that can be used for planning. Thus, some
functions, such as `set_baserel_size_estimates` will return strange
estimates for data node rels when the planner believes it has stats
(e.g., after an ANALYZE).

This change fixes this issue by not relying on the planner to compute
rel estimates for data node rels. Instead the accumulated estimates
based on the chunks queried by the data node rel are used. This also
obviates the need to compute these stats again.

Given the new size estimates that this change enables, some plan/test
outputs have changed and tests updated to deal with that.
This commit is contained in:
Erik Nordström 2019-10-28 10:13:59 +01:00 committed by Erik Nordström
parent 0050810803
commit af0a75f8fe
11 changed files with 192 additions and 172 deletions

View File

@ -425,11 +425,7 @@ tsl_debug_append_path(StringInfo buf, PlannerInfo *root, Path *path, int indent)
appendStringInfoString(buf, ")");
}
appendStringInfo(buf,
" rows=%.0f cost=%.2f..%.2f",
path->rows,
path->startup_cost,
path->total_cost);
appendStringInfo(buf, " rows=%.0f", path->rows);
if (path->pathkeys)
appendStringInfoString(buf, " has pathkeys");

View File

@ -93,6 +93,7 @@ data_node_chunk_assignment_assign_chunk(DataNodeChunkAssignments *scas, RelOptIn
sca->remote_chunk_ids =
lappend_int(sca->remote_chunk_ids,
get_remote_chunk_id_from_relid(chunkrel->serverid, rte->relid));
sca->pages += chunkrel->pages;
sca->rows += chunkrel->rows;
sca->tuples += chunkrel->tuples;

View File

@ -17,6 +17,7 @@
#include <nodes/relation.h>
#endif
#include <utils/hsearch.h>
#include <storage/block.h>
/*
* data node-chunk assignments map chunks to the data nodes that will be responsible
@ -29,6 +30,7 @@
typedef struct DataNodeChunkAssignment
{
Oid node_server_oid;
BlockNumber pages;
double rows;
double tuples;
Cost startup_cost;

View File

@ -435,11 +435,15 @@ data_node_scan_add_node_paths(PlannerInfo *root, RelOptInfo *hyper_rel)
data_node_chunk_assignment_get_or_create(&scas, data_node_rel);
TsFdwRelInfo *fpinfo;
/* Update the number of tuples and rows based on the chunk
* assignments */
/* Basic stats for data node rels come from the assigned chunks since
* data node rels don't correspond to real tables in the system */
data_node_rel->pages = sca->pages;
data_node_rel->tuples = sca->tuples;
data_node_rel->rows = sca->rows;
/* Should also have the same width as any queried chunk */
data_node_rel->reltarget->width = hyper_rel->part_rels[0]->reltarget->width;
fpinfo = fdw_relinfo_create(root,
data_node_rel,
data_node_rel->serverid,

View File

@ -239,8 +239,14 @@ fdw_relinfo_create(PlannerInfo *root, RelOptInfo *rel, Oid server_oid, Oid local
rel->pages = 10;
rel->tuples = (10 * BLCKSZ) / (rel->reltarget->width + MAXALIGN(SizeofHeapTupleHeader));
}
/* Estimate rel size as best we can with local statistics. */
set_baserel_size_estimates(root, rel);
/* Estimate rel size as best we can with local statistics. There are
* no local statistics for data node rels since they aren't real base
* rels (there's no corresponding table in the system to associate
* stats with). Instead, data node rels already have basic stats set
* at creation time based on data-node-chunk assignment. */
if (fpinfo->type != TS_FDW_RELINFO_HYPERTABLE_DATA_NODE)
set_baserel_size_estimates(root, rel);
/* Fill in basically-bogus cost estimates for use later. */
fdw_estimate_path_cost_size(root,

View File

@ -61,6 +61,7 @@ INSERT INTO hyper VALUES
('2018-05-19 13:01', 4, 4, 5.1),
('2018-05-20 15:08', 5, 1, 9.4),
('2018-05-30 13:02', 3, 2, 9.0);
ANALYZE hyper;
-- Optimizer debug messages shown at debug level 2
SET client_min_messages TO DEBUG2;
-- Turning on show_rel should show a message
@ -78,16 +79,16 @@ GROUP BY 1, 2
HAVING avg(temp) > 4
ORDER BY 1, 2;
DEBUG: In tsl_set_rel_pathlist:
RELOPTINFO [BASEREL] (names: hyper): rows=36 width=20
RELOPTINFO [BASEREL] (names: hyper): rows=5 width=20
path list:
Append [parents: hyper] rows=37 cost=100.00..412.58
CustomScan (DataNodeScanPath) [parents: hyper] rows=9 cost=100.00..128.09
CustomScan (DataNodeScanPath) [parents: hyper] rows=19 cost=100.00..156.21
CustomScan (DataNodeScanPath) [parents: hyper] rows=9 cost=100.00..128.09
MergeAppend [parents: hyper] rows=37 cost=300.02..418.48 has pathkeys
CustomScan (DataNodeScanPath) [parents: hyper] rows=9 cost=100.00..129.49 has pathkeys
CustomScan (DataNodeScanPath) [parents: hyper] rows=19 cost=100.00..159.00 has pathkeys
CustomScan (DataNodeScanPath) [parents: hyper] rows=9 cost=100.00..129.49 has pathkeys
Append [parents: hyper] rows=5
CustomScan (DataNodeScanPath) [parents: hyper] rows=1 has pathkeys
CustomScan (DataNodeScanPath) [parents: hyper] rows=3 has pathkeys
CustomScan (DataNodeScanPath) [parents: hyper] rows=1 has pathkeys
MergeAppend [parents: hyper] rows=5 has pathkeys
CustomScan (DataNodeScanPath) [parents: hyper] rows=1 has pathkeys
CustomScan (DataNodeScanPath) [parents: hyper] rows=3 has pathkeys
CustomScan (DataNodeScanPath) [parents: hyper] rows=1 has pathkeys
time | device | temp
@ -115,32 +116,22 @@ GROUP BY 1, 2
HAVING avg(temp) > 4
ORDER BY 1, 2;
DEBUG: Stage GROUP_AGG in get_foreign_upper_paths:
RELOPTINFO [OTHER_UPPER_REL] (names: hyper): rows=9 width=20
RELOPTINFO [OTHER_UPPER_REL] (names: hyper): rows=1 width=20
path list:
CustomScan (DataNodeScanPath) [parents: hyper] rows=9 cost=100.09..128.32
Agg [parents: hyper] rows=3 cost=128.18..128.34
CustomScan (DataNodeScanPath) [parents: hyper] rows=9 cost=100.00..128.09
Agg [parents: hyper] rows=3 cost=100.00..129.74 has pathkeys
CustomScan (DataNodeScanPath) [parents: hyper] rows=9 cost=100.00..129.49 has pathkeys
CustomScan (DataNodeScanPath) [parents: hyper] rows=1 has pathkeys
DEBUG: Stage GROUP_AGG in get_foreign_upper_paths:
RELOPTINFO [OTHER_UPPER_REL] (names: hyper): rows=19 width=20
RELOPTINFO [OTHER_UPPER_REL] (names: hyper): rows=3 width=20
path list:
CustomScan (DataNodeScanPath) [parents: hyper] rows=19 cost=100.19..156.69
Agg [parents: hyper] rows=6 cost=156.61..157.18 has pathkeys
Sort [parents: hyper] rows=19 cost=156.61..156.66 has pathkeys
CustomScan (DataNodeScanPath) [parents: hyper] rows=19 cost=100.00..156.21
Agg [parents: hyper] rows=1 has pathkeys
CustomScan (DataNodeScanPath) [parents: hyper] rows=3 has pathkeys
DEBUG: Stage GROUP_AGG in get_foreign_upper_paths:
RELOPTINFO [OTHER_UPPER_REL] (names: hyper): rows=9 width=20
RELOPTINFO [OTHER_UPPER_REL] (names: hyper): rows=1 width=20
path list:
CustomScan (DataNodeScanPath) [parents: hyper] rows=9 cost=100.09..128.32
Agg [parents: hyper] rows=3 cost=128.18..128.34
CustomScan (DataNodeScanPath) [parents: hyper] rows=9 cost=100.00..128.09
Agg [parents: hyper] rows=3 cost=100.00..129.74 has pathkeys
CustomScan (DataNodeScanPath) [parents: hyper] rows=9 cost=100.00..129.49 has pathkeys
CustomScan (DataNodeScanPath) [parents: hyper] rows=1 has pathkeys
time | device | temp

View File

@ -119,6 +119,8 @@ ORDER BY 1, 2, 3;
2 | 4 | 1717986916 | 9223372036854775807
(13 rows)
ANALYZE hyper;
ANALYZE hyper_repart;
-- Run the EXPLAINs
SET enable_partitionwise_aggregate = ON;
\ir :TEST_QUERY_NAME
@ -132,33 +134,34 @@ FROM :TABLE_NAME
WHERE time BETWEEN '2019-01-01' AND '2019-01-03'
GROUP BY 1,2
ORDER BY 1,2;
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
GroupAggregate
Output: (time_bucket('@ 2 days'::interval, hyper_repart."time")), hyper_repart.device, avg(hyper_repart.temp)
Group Key: (time_bucket('@ 2 days'::interval, hyper_repart."time")), hyper_repart.device
-> Sort
Output: (time_bucket('@ 2 days'::interval, hyper_repart."time")), hyper_repart.device, hyper_repart.temp
Sort Key: (time_bucket('@ 2 days'::interval, hyper_repart."time")), hyper_repart.device
-> Custom Scan (AsyncAppend)
Output: (time_bucket('@ 2 days'::interval, hyper_repart."time")), hyper_repart.device, hyper_repart.temp
-> Append
-> Custom Scan (DataNodeScan) on public.hyper_repart hyper_repart_1
Output: time_bucket('@ 2 days'::interval, hyper_repart_1."time"), hyper_repart_1.device, hyper_repart_1.temp
Data node: data_node_1
Chunks: _hyper_2_17_dist_chunk, _hyper_2_16_dist_chunk, _hyper_2_22_dist_chunk
Remote SQL: SELECT "time", device, temp FROM public.hyper_repart WHERE _timescaledb_internal.chunks_in(hyper_repart, ARRAY[7, 6, 8]) AND (("time" >= '2019-01-01 00:00:00-08'::timestamp with time zone)) AND (("time" <= '2019-01-03 00:00:00-08'::timestamp with time zone))
-> Custom Scan (DataNodeScan) on public.hyper_repart hyper_repart_2
Output: time_bucket('@ 2 days'::interval, hyper_repart_2."time"), hyper_repart_2.device, hyper_repart_2.temp
Data node: data_node_2
Chunks: _hyper_2_18_dist_chunk, _hyper_2_19_dist_chunk, _hyper_2_23_dist_chunk
Remote SQL: SELECT "time", device, temp FROM public.hyper_repart WHERE _timescaledb_internal.chunks_in(hyper_repart, ARRAY[6, 7, 8]) AND (("time" >= '2019-01-01 00:00:00-08'::timestamp with time zone)) AND (("time" <= '2019-01-03 00:00:00-08'::timestamp with time zone))
-> Custom Scan (DataNodeScan) on public.hyper_repart hyper_repart_3
Output: time_bucket('@ 2 days'::interval, hyper_repart_3."time"), hyper_repart_3.device, hyper_repart_3.temp
Data node: data_node_3
Chunks: _hyper_2_21_dist_chunk, _hyper_2_20_dist_chunk
Remote SQL: SELECT "time", device, temp FROM public.hyper_repart WHERE _timescaledb_internal.chunks_in(hyper_repart, ARRAY[7, 6]) AND (("time" >= '2019-01-01 00:00:00-08'::timestamp with time zone)) AND (("time" <= '2019-01-03 00:00:00-08'::timestamp with time zone))
(24 rows)
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Finalize GroupAggregate
Output: (time_bucket('@ 2 days'::interval, "time")), device, avg(temp)
Group Key: (time_bucket('@ 2 days'::interval, "time")), device
-> Custom Scan (AsyncAppend)
Output: (time_bucket('@ 2 days'::interval, "time")), device, (PARTIAL avg(temp))
-> Merge Append
Sort Key: (time_bucket('@ 2 days'::interval, hyper_repart."time")), hyper_repart.device
-> Custom Scan (DataNodeScan)
Output: (time_bucket('@ 2 days'::interval, hyper_repart."time")), hyper_repart.device, (PARTIAL avg(hyper_repart.temp))
Relations: Aggregate on (public.hyper_repart)
Data node: data_node_1
Chunks: _hyper_2_17_dist_chunk, _hyper_2_16_dist_chunk, _hyper_2_22_dist_chunk
Remote SQL: SELECT public.time_bucket('@ 2 days'::interval, "time"), device, _timescaledb_internal.partialize_agg(avg(temp)) FROM public.hyper_repart WHERE _timescaledb_internal.chunks_in(hyper_repart, ARRAY[7, 6, 8]) AND (("time" >= '2019-01-01 00:00:00-08'::timestamp with time zone)) AND (("time" <= '2019-01-03 00:00:00-08'::timestamp with time zone)) GROUP BY 1, 2 ORDER BY public.time_bucket('2 days'::interval, "time") ASC NULLS LAST, device ASC NULLS LAST
-> Custom Scan (DataNodeScan)
Output: (time_bucket('@ 2 days'::interval, hyper_repart_1."time")), hyper_repart_1.device, (PARTIAL avg(hyper_repart_1.temp))
Relations: Aggregate on (public.hyper_repart)
Data node: data_node_2
Chunks: _hyper_2_18_dist_chunk, _hyper_2_19_dist_chunk, _hyper_2_23_dist_chunk
Remote SQL: SELECT public.time_bucket('@ 2 days'::interval, "time"), device, _timescaledb_internal.partialize_agg(avg(temp)) FROM public.hyper_repart WHERE _timescaledb_internal.chunks_in(hyper_repart, ARRAY[6, 7, 8]) AND (("time" >= '2019-01-01 00:00:00-08'::timestamp with time zone)) AND (("time" <= '2019-01-03 00:00:00-08'::timestamp with time zone)) GROUP BY 1, 2 ORDER BY public.time_bucket('2 days'::interval, "time") ASC NULLS LAST, device ASC NULLS LAST
-> Custom Scan (DataNodeScan)
Output: (time_bucket('@ 2 days'::interval, hyper_repart_2."time")), hyper_repart_2.device, (PARTIAL avg(hyper_repart_2.temp))
Relations: Aggregate on (public.hyper_repart)
Data node: data_node_3
Chunks: _hyper_2_21_dist_chunk, _hyper_2_20_dist_chunk
Remote SQL: SELECT public.time_bucket('@ 2 days'::interval, "time"), device, _timescaledb_internal.partialize_agg(avg(temp)) FROM public.hyper_repart WHERE _timescaledb_internal.chunks_in(hyper_repart, ARRAY[7, 6]) AND (("time" >= '2019-01-01 00:00:00-08'::timestamp with time zone)) AND (("time" <= '2019-01-03 00:00:00-08'::timestamp with time zone)) GROUP BY 1, 2 ORDER BY public.time_bucket('2 days'::interval, "time") ASC NULLS LAST, device ASC NULLS LAST
(25 rows)
-- Query doesn't cover repartitioning boundary, so safe to push down
-- bucketing
@ -186,13 +189,15 @@ ORDER BY 1,2;
Data node: data_node_2
Chunks: _hyper_2_18_dist_chunk
Remote SQL: SELECT public.time_bucket('@ 2 days'::interval, "time"), device, avg(temp) FROM public.hyper_repart WHERE _timescaledb_internal.chunks_in(hyper_repart, ARRAY[6]) AND (("time" >= '2019-01-01 00:00:00-08'::timestamp with time zone)) AND (("time" <= '2019-01-01 15:00:00-08'::timestamp with time zone)) GROUP BY 1, 2 ORDER BY public.time_bucket('2 days'::interval, "time") ASC NULLS LAST, device ASC NULLS LAST
-> Custom Scan (DataNodeScan)
Output: (time_bucket('@ 2 days'::interval, hyper_repart_2."time")), hyper_repart_2.device, (avg(hyper_repart_2.temp))
Relations: Aggregate on (public.hyper_repart)
Data node: data_node_3
Chunks: _hyper_2_21_dist_chunk
Remote SQL: SELECT public.time_bucket('@ 2 days'::interval, "time"), device, avg(temp) FROM public.hyper_repart WHERE _timescaledb_internal.chunks_in(hyper_repart, ARRAY[7]) AND (("time" >= '2019-01-01 00:00:00-08'::timestamp with time zone)) AND (("time" <= '2019-01-01 15:00:00-08'::timestamp with time zone)) GROUP BY 1, 2 ORDER BY public.time_bucket('2 days'::interval, "time") ASC NULLS LAST, device ASC NULLS LAST
(22 rows)
-> GroupAggregate
Output: (time_bucket('@ 2 days'::interval, hyper_repart_2."time")), hyper_repart_2.device, avg(hyper_repart_2.temp)
Group Key: time_bucket('@ 2 days'::interval, hyper_repart_2."time"), hyper_repart_2.device
-> Custom Scan (DataNodeScan) on public.hyper_repart hyper_repart_2
Output: time_bucket('@ 2 days'::interval, hyper_repart_2."time"), hyper_repart_2.device, hyper_repart_2.temp
Data node: data_node_3
Chunks: _hyper_2_21_dist_chunk
Remote SQL: SELECT "time", device, temp FROM public.hyper_repart WHERE _timescaledb_internal.chunks_in(hyper_repart, ARRAY[7]) AND (("time" >= '2019-01-01 00:00:00-08'::timestamp with time zone)) AND (("time" <= '2019-01-01 15:00:00-08'::timestamp with time zone)) ORDER BY public.time_bucket('2 days'::interval, "time") ASC NULLS LAST, device ASC NULLS LAST
(24 rows)
-- Run the queries for each setting. Each setting's result is
-- generated into its own file

View File

@ -2012,14 +2012,15 @@ WHERE time < '2018-06-01 00:00'
GROUP BY 1, 2
HAVING avg(temp) * custom_sum(device) > 0.8
LIMIT 1;
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Limit
Output: "time", device, (avg(temp)), (((sum(temp) * random()) * (device)::double precision))
-> Custom Scan (AsyncAppend)
Output: "time", device, (avg(temp)), (((sum(temp) * random()) * (device)::double precision))
-> Append
-> HashAggregate
-> Merge Append
Sort Key: hyper."time", hyper.device
-> GroupAggregate
Output: hyper."time", hyper.device, avg(hyper.temp), ((sum(hyper.temp) * random()) * (hyper.device)::double precision)
Group Key: hyper."time", hyper.device
Filter: ((avg(hyper.temp) * (custom_sum(hyper.device))::double precision) > '0.8'::double precision)
@ -2027,8 +2028,8 @@ LIMIT 1;
Output: hyper."time", hyper.device, hyper.temp
Data node: data_node_1
Chunks: _hyper_1_1_dist_chunk, _hyper_1_3_dist_chunk
Remote SQL: SELECT "time", device, temp FROM public.hyper WHERE _timescaledb_internal.chunks_in(hyper, ARRAY[1, 2]) AND (("time" < '2018-06-01 00:00:00-07'::timestamp with time zone))
-> HashAggregate
Remote SQL: SELECT "time", device, temp FROM public.hyper WHERE _timescaledb_internal.chunks_in(hyper, ARRAY[1, 2]) AND (("time" < '2018-06-01 00:00:00-07'::timestamp with time zone)) ORDER BY "time" ASC NULLS LAST, device ASC NULLS LAST
-> GroupAggregate
Output: hyper_1."time", hyper_1.device, avg(hyper_1.temp), ((sum(hyper_1.temp) * random()) * (hyper_1.device)::double precision)
Group Key: hyper_1."time", hyper_1.device
Filter: ((avg(hyper_1.temp) * (custom_sum(hyper_1.device))::double precision) > '0.8'::double precision)
@ -2036,8 +2037,8 @@ LIMIT 1;
Output: hyper_1."time", hyper_1.device, hyper_1.temp
Data node: data_node_2
Chunks: _hyper_1_4_dist_chunk, _hyper_1_2_dist_chunk
Remote SQL: SELECT "time", device, temp FROM public.hyper WHERE _timescaledb_internal.chunks_in(hyper, ARRAY[2, 1]) AND (("time" < '2018-06-01 00:00:00-07'::timestamp with time zone))
(23 rows)
Remote SQL: SELECT "time", device, temp FROM public.hyper WHERE _timescaledb_internal.chunks_in(hyper, ARRAY[2, 1]) AND (("time" < '2018-06-01 00:00:00-07'::timestamp with time zone)) ORDER BY "time" ASC NULLS LAST, device ASC NULLS LAST
(24 rows)
-- not pushed down because of non-shippable expression on the
-- underlying rel
@ -2086,30 +2087,32 @@ WHERE (hyper.temp * random() <= 20)
AND time < '2018-06-01 00:00'
GROUP BY 1, 2
LIMIT 1;
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Limit
Output: hyper."time", hyper.device, (avg(hyper.temp))
-> GroupAggregate
Output: hyper."time", hyper.device, avg(hyper.temp)
Group Key: hyper."time", hyper.device
-> Custom Scan (AsyncAppend)
Output: hyper."time", hyper.device, hyper.temp
-> Merge Append
Sort Key: hyper_1."time", hyper_1.device
Output: "time", device, (avg(temp))
-> Custom Scan (AsyncAppend)
Output: "time", device, (avg(temp))
-> Append
-> HashAggregate
Output: hyper."time", hyper.device, avg(hyper.temp)
Group Key: hyper."time", hyper.device
-> Custom Scan (DataNodeScan) on public.hyper
Output: hyper."time", hyper.device, hyper.temp
Filter: ((hyper.temp * random()) <= '20'::double precision)
Data node: data_node_1
Chunks: _hyper_1_1_dist_chunk, _hyper_1_3_dist_chunk
Remote SQL: SELECT "time", device, temp FROM public.hyper WHERE _timescaledb_internal.chunks_in(hyper, ARRAY[1, 2]) AND (("time" < '2018-06-01 00:00:00-07'::timestamp with time zone))
-> HashAggregate
Output: hyper_1."time", hyper_1.device, avg(hyper_1.temp)
Group Key: hyper_1."time", hyper_1.device
-> Custom Scan (DataNodeScan) on public.hyper hyper_1
Output: hyper_1."time", hyper_1.device, hyper_1.temp
Filter: ((hyper_1.temp * random()) <= '20'::double precision)
Data node: data_node_1
Chunks: _hyper_1_1_dist_chunk, _hyper_1_3_dist_chunk
Remote SQL: SELECT "time", device, temp FROM public.hyper WHERE _timescaledb_internal.chunks_in(hyper, ARRAY[1, 2]) AND (("time" < '2018-06-01 00:00:00-07'::timestamp with time zone)) ORDER BY "time" ASC NULLS LAST, device ASC NULLS LAST
-> Custom Scan (DataNodeScan) on public.hyper hyper_2
Output: hyper_2."time", hyper_2.device, hyper_2.temp
Filter: ((hyper_2.temp * random()) <= '20'::double precision)
Data node: data_node_2
Chunks: _hyper_1_4_dist_chunk, _hyper_1_2_dist_chunk
Remote SQL: SELECT "time", device, temp FROM public.hyper WHERE _timescaledb_internal.chunks_in(hyper, ARRAY[2, 1]) AND (("time" < '2018-06-01 00:00:00-07'::timestamp with time zone)) ORDER BY "time" ASC NULLS LAST, device ASC NULLS LAST
(21 rows)
Remote SQL: SELECT "time", device, temp FROM public.hyper WHERE _timescaledb_internal.chunks_in(hyper, ARRAY[2, 1]) AND (("time" < '2018-06-01 00:00:00-07'::timestamp with time zone))
(23 rows)
-- contains whitelisted time expressions
SELECT test_override_pushdown_timestamptz('2018-06-01 00:00'::timestamptz);

View File

@ -842,28 +842,30 @@ FROM hyper
WHERE time > '2018-04-19 00:01'
GROUP BY 1
ORDER BY 1;
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Finalize GroupAggregate
Output: device, avg(temp)
Group Key: device
-> Custom Scan (AsyncAppend)
Output: device, (PARTIAL avg(temp))
-> Merge Append
Sort Key: hyper.device
-> Custom Scan (DataNodeScan)
Output: hyper.device, (PARTIAL avg(hyper.temp))
Relations: Aggregate on (public.hyper)
Data node: data_node_1
Chunks: _hyper_1_3_dist_chunk, _hyper_1_5_dist_chunk, _hyper_1_7_dist_chunk
Remote SQL: SELECT device, _timescaledb_internal.partialize_agg(avg(temp)) FROM public.hyper WHERE _timescaledb_internal.chunks_in(hyper, ARRAY[2, 3, 4]) AND (("time" > '2018-04-19 00:01:00-07'::timestamp with time zone)) GROUP BY 1 ORDER BY device ASC NULLS LAST
-> Custom Scan (DataNodeScan)
Output: hyper_1.device, (PARTIAL avg(hyper_1.temp))
Relations: Aggregate on (public.hyper)
Data node: data_node_2
Chunks: _hyper_1_4_dist_chunk, _hyper_1_6_dist_chunk
Remote SQL: SELECT device, _timescaledb_internal.partialize_agg(avg(temp)) FROM public.hyper WHERE _timescaledb_internal.chunks_in(hyper, ARRAY[2, 3]) AND (("time" > '2018-04-19 00:01:00-07'::timestamp with time zone)) GROUP BY 1 ORDER BY device ASC NULLS LAST
(19 rows)
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Sort
Output: device, (avg(temp))
Sort Key: device
-> Finalize HashAggregate
Output: device, avg(temp)
Group Key: device
-> Custom Scan (AsyncAppend)
Output: device, (PARTIAL avg(temp))
-> Append
-> Custom Scan (DataNodeScan)
Output: hyper.device, (PARTIAL avg(hyper.temp))
Relations: Aggregate on (public.hyper)
Data node: data_node_1
Chunks: _hyper_1_3_dist_chunk, _hyper_1_5_dist_chunk, _hyper_1_7_dist_chunk
Remote SQL: SELECT device, _timescaledb_internal.partialize_agg(avg(temp)) FROM public.hyper WHERE _timescaledb_internal.chunks_in(hyper, ARRAY[2, 3, 4]) AND (("time" > '2018-04-19 00:01:00-07'::timestamp with time zone)) GROUP BY 1
-> Custom Scan (DataNodeScan)
Output: hyper_1.device, (PARTIAL avg(hyper_1.temp))
Relations: Aggregate on (public.hyper)
Data node: data_node_2
Chunks: _hyper_1_4_dist_chunk, _hyper_1_6_dist_chunk
Remote SQL: SELECT device, _timescaledb_internal.partialize_agg(avg(temp)) FROM public.hyper WHERE _timescaledb_internal.chunks_in(hyper, ARRAY[2, 3]) AND (("time" > '2018-04-19 00:01:00-07'::timestamp with time zone)) GROUP BY 1
(21 rows)
SELECT device, avg(temp)
FROM hyper
@ -1270,29 +1272,28 @@ FROM hyper
WHERE time BETWEEN '2018-04-19 00:01' AND '2018-06-01 00:00'
GROUP BY 1, 2
ORDER BY 1, 2;
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Custom Scan (AsyncAppend)
Output: "time", device, (avg(temp))
-> Merge Append
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
GroupAggregate
Output: hyper."time", hyper.device, avg(hyper.temp)
Group Key: hyper."time", hyper.device
-> Sort
Output: hyper."time", hyper.device, hyper.temp
Sort Key: hyper."time", hyper.device
-> GroupAggregate
Output: hyper."time", hyper.device, avg(hyper.temp)
Group Key: hyper."time", hyper.device
-> Custom Scan (DataNodeScan) on public.hyper
Output: hyper."time", hyper.device, hyper.temp
Data node: data_node_1
Chunks: _hyper_1_3_dist_chunk
Remote SQL: SELECT "time", device, temp FROM public.hyper WHERE _timescaledb_internal.chunks_in(hyper, ARRAY[2]) AND (("time" >= '2018-04-19 00:01:00-07'::timestamp with time zone)) AND (("time" <= '2018-06-01 00:00:00-07'::timestamp with time zone)) ORDER BY "time" ASC NULLS LAST, device ASC NULLS LAST
-> GroupAggregate
Output: hyper_1."time", hyper_1.device, avg(hyper_1.temp)
Group Key: hyper_1."time", hyper_1.device
-> Custom Scan (DataNodeScan) on public.hyper hyper_1
Output: hyper_1."time", hyper_1.device, hyper_1.temp
Data node: data_node_2
Chunks: _hyper_1_4_dist_chunk
Remote SQL: SELECT "time", device, temp FROM public.hyper WHERE _timescaledb_internal.chunks_in(hyper, ARRAY[2]) AND (("time" >= '2018-04-19 00:01:00-07'::timestamp with time zone)) AND (("time" <= '2018-06-01 00:00:00-07'::timestamp with time zone)) ORDER BY "time" ASC NULLS LAST, device ASC NULLS LAST
(20 rows)
-> Custom Scan (AsyncAppend)
Output: hyper."time", hyper.device, hyper.temp
-> Append
-> Custom Scan (DataNodeScan) on public.hyper hyper_1
Output: hyper_1."time", hyper_1.device, hyper_1.temp
Data node: data_node_1
Chunks: _hyper_1_3_dist_chunk
Remote SQL: SELECT "time", device, temp FROM public.hyper WHERE _timescaledb_internal.chunks_in(hyper, ARRAY[2]) AND (("time" >= '2018-04-19 00:01:00-07'::timestamp with time zone)) AND (("time" <= '2018-06-01 00:00:00-07'::timestamp with time zone))
-> Custom Scan (DataNodeScan) on public.hyper hyper_2
Output: hyper_2."time", hyper_2.device, hyper_2.temp
Data node: data_node_2
Chunks: _hyper_1_4_dist_chunk
Remote SQL: SELECT "time", device, temp FROM public.hyper WHERE _timescaledb_internal.chunks_in(hyper, ARRAY[2]) AND (("time" >= '2018-04-19 00:01:00-07'::timestamp with time zone)) AND (("time" <= '2018-06-01 00:00:00-07'::timestamp with time zone))
(19 rows)
-- Test HAVING qual
EXPLAIN (VERBOSE, COSTS OFF)
@ -1344,31 +1345,35 @@ WHERE time BETWEEN '2018-04-19 00:01' AND '2018-06-01 00:00'
GROUP BY 1, 2
HAVING avg(temp) > 4
ORDER BY 1, 2;
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Custom Scan (AsyncAppend)
Output: "time", device, (avg(temp))
-> Merge Append
Sort Key: hyper."time", hyper.device
-> GroupAggregate
Output: hyper."time", hyper.device, avg(hyper.temp)
Group Key: hyper."time", hyper.device
Filter: (avg(hyper.temp) > '4'::double precision)
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Merge Append
Sort Key: hyper."time", hyper.device
-> GroupAggregate
Output: hyper."time", hyper.device, avg(hyper.temp)
Group Key: hyper."time", hyper.device
Filter: (avg(hyper.temp) > '4'::double precision)
-> Sort
Output: hyper."time", hyper.device, hyper.temp
Sort Key: hyper."time", hyper.device
-> Custom Scan (DataNodeScan) on public.hyper
Output: hyper."time", hyper.device, hyper.temp
Data node: data_node_1
Chunks: _hyper_1_3_dist_chunk
Remote SQL: SELECT "time", device, temp FROM public.hyper WHERE _timescaledb_internal.chunks_in(hyper, ARRAY[2]) AND (("time" >= '2018-04-19 00:01:00-07'::timestamp with time zone)) AND (("time" <= '2018-06-01 00:00:00-07'::timestamp with time zone)) ORDER BY "time" ASC NULLS LAST, device ASC NULLS LAST
-> GroupAggregate
Output: hyper_1."time", hyper_1.device, avg(hyper_1.temp)
Group Key: hyper_1."time", hyper_1.device
Filter: (avg(hyper_1.temp) > '4'::double precision)
Remote SQL: SELECT "time", device, temp FROM public.hyper WHERE _timescaledb_internal.chunks_in(hyper, ARRAY[2]) AND (("time" >= '2018-04-19 00:01:00-07'::timestamp with time zone)) AND (("time" <= '2018-06-01 00:00:00-07'::timestamp with time zone))
-> GroupAggregate
Output: hyper_1."time", hyper_1.device, avg(hyper_1.temp)
Group Key: hyper_1."time", hyper_1.device
Filter: (avg(hyper_1.temp) > '4'::double precision)
-> Sort
Output: hyper_1."time", hyper_1.device, hyper_1.temp
Sort Key: hyper_1."time", hyper_1.device
-> Custom Scan (DataNodeScan) on public.hyper hyper_1
Output: hyper_1."time", hyper_1.device, hyper_1.temp
Data node: data_node_2
Chunks: _hyper_1_4_dist_chunk
Remote SQL: SELECT "time", device, temp FROM public.hyper WHERE _timescaledb_internal.chunks_in(hyper, ARRAY[2]) AND (("time" >= '2018-04-19 00:01:00-07'::timestamp with time zone)) AND (("time" <= '2018-06-01 00:00:00-07'::timestamp with time zone)) ORDER BY "time" ASC NULLS LAST, device ASC NULLS LAST
(22 rows)
Remote SQL: SELECT "time", device, temp FROM public.hyper WHERE _timescaledb_internal.chunks_in(hyper, ARRAY[2]) AND (("time" >= '2018-04-19 00:01:00-07'::timestamp with time zone)) AND (("time" <= '2018-06-01 00:00:00-07'::timestamp with time zone))
(26 rows)
SELECT time, device, avg(temp) AS temp
FROM hyper
@ -1942,30 +1947,31 @@ FROM hyper
WHERE time < '2018-06-01 00:00'
GROUP BY 1, 2
LIMIT 1;
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Limit
Output: "time", device, (avg(temp)), ((random() * (device)::double precision)), (custom_sum(device))
-> Custom Scan (AsyncAppend)
Output: "time", device, (avg(temp)), ((random() * (device)::double precision)), (custom_sum(device))
-> Append
-> HashAggregate
-> Merge Append
Sort Key: hyper."time", hyper.device
-> GroupAggregate
Output: hyper."time", hyper.device, avg(hyper.temp), (random() * (hyper.device)::double precision), custom_sum(hyper.device)
Group Key: hyper."time", hyper.device
-> Custom Scan (DataNodeScan) on public.hyper
Output: hyper."time", hyper.device, hyper.temp
Data node: data_node_1
Chunks: _hyper_1_1_dist_chunk, _hyper_1_3_dist_chunk
Remote SQL: SELECT "time", device, temp FROM public.hyper WHERE _timescaledb_internal.chunks_in(hyper, ARRAY[1, 2]) AND (("time" < '2018-06-01 00:00:00-07'::timestamp with time zone))
-> HashAggregate
Remote SQL: SELECT "time", device, temp FROM public.hyper WHERE _timescaledb_internal.chunks_in(hyper, ARRAY[1, 2]) AND (("time" < '2018-06-01 00:00:00-07'::timestamp with time zone)) ORDER BY "time" ASC NULLS LAST, device ASC NULLS LAST
-> GroupAggregate
Output: hyper_1."time", hyper_1.device, avg(hyper_1.temp), (random() * (hyper_1.device)::double precision), custom_sum(hyper_1.device)
Group Key: hyper_1."time", hyper_1.device
-> Custom Scan (DataNodeScan) on public.hyper hyper_1
Output: hyper_1."time", hyper_1.device, hyper_1.temp
Data node: data_node_2
Chunks: _hyper_1_4_dist_chunk, _hyper_1_2_dist_chunk
Remote SQL: SELECT "time", device, temp FROM public.hyper WHERE _timescaledb_internal.chunks_in(hyper, ARRAY[2, 1]) AND (("time" < '2018-06-01 00:00:00-07'::timestamp with time zone))
(21 rows)
Remote SQL: SELECT "time", device, temp FROM public.hyper WHERE _timescaledb_internal.chunks_in(hyper, ARRAY[2, 1]) AND (("time" < '2018-06-01 00:00:00-07'::timestamp with time zone)) ORDER BY "time" ASC NULLS LAST, device ASC NULLS LAST
(22 rows)
EXPLAIN (VERBOSE, COSTS OFF)
SELECT time, device, avg(temp), sum(temp) * random() * device as sum_temp
@ -2010,14 +2016,15 @@ WHERE time < '2018-06-01 00:00'
GROUP BY 1, 2
HAVING avg(temp) * custom_sum(device) > 0.8
LIMIT 1;
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Limit
Output: "time", device, (avg(temp)), (((sum(temp) * random()) * (device)::double precision))
-> Custom Scan (AsyncAppend)
Output: "time", device, (avg(temp)), (((sum(temp) * random()) * (device)::double precision))
-> Append
-> HashAggregate
-> Merge Append
Sort Key: hyper."time", hyper.device
-> GroupAggregate
Output: hyper."time", hyper.device, avg(hyper.temp), ((sum(hyper.temp) * random()) * (hyper.device)::double precision)
Group Key: hyper."time", hyper.device
Filter: ((avg(hyper.temp) * (custom_sum(hyper.device))::double precision) > '0.8'::double precision)
@ -2025,8 +2032,8 @@ LIMIT 1;
Output: hyper."time", hyper.device, hyper.temp
Data node: data_node_1
Chunks: _hyper_1_1_dist_chunk, _hyper_1_3_dist_chunk
Remote SQL: SELECT "time", device, temp FROM public.hyper WHERE _timescaledb_internal.chunks_in(hyper, ARRAY[1, 2]) AND (("time" < '2018-06-01 00:00:00-07'::timestamp with time zone))
-> HashAggregate
Remote SQL: SELECT "time", device, temp FROM public.hyper WHERE _timescaledb_internal.chunks_in(hyper, ARRAY[1, 2]) AND (("time" < '2018-06-01 00:00:00-07'::timestamp with time zone)) ORDER BY "time" ASC NULLS LAST, device ASC NULLS LAST
-> GroupAggregate
Output: hyper_1."time", hyper_1.device, avg(hyper_1.temp), ((sum(hyper_1.temp) * random()) * (hyper_1.device)::double precision)
Group Key: hyper_1."time", hyper_1.device
Filter: ((avg(hyper_1.temp) * (custom_sum(hyper_1.device))::double precision) > '0.8'::double precision)
@ -2034,8 +2041,8 @@ LIMIT 1;
Output: hyper_1."time", hyper_1.device, hyper_1.temp
Data node: data_node_2
Chunks: _hyper_1_4_dist_chunk, _hyper_1_2_dist_chunk
Remote SQL: SELECT "time", device, temp FROM public.hyper WHERE _timescaledb_internal.chunks_in(hyper, ARRAY[2, 1]) AND (("time" < '2018-06-01 00:00:00-07'::timestamp with time zone))
(23 rows)
Remote SQL: SELECT "time", device, temp FROM public.hyper WHERE _timescaledb_internal.chunks_in(hyper, ARRAY[2, 1]) AND (("time" < '2018-06-01 00:00:00-07'::timestamp with time zone)) ORDER BY "time" ASC NULLS LAST, device ASC NULLS LAST
(24 rows)
-- not pushed down because of non-shippable expression on the
-- underlying rel

View File

@ -48,6 +48,8 @@ INSERT INTO hyper VALUES
('2018-05-20 15:08', 5, 1, 9.4),
('2018-05-30 13:02', 3, 2, 9.0);
ANALYZE hyper;
-- Optimizer debug messages shown at debug level 2
SET client_min_messages TO DEBUG2;

View File

@ -50,3 +50,6 @@ FROM _timescaledb_catalog.dimension d, _timescaledb_catalog.dimension_slice ds
WHERE num_slices IS NOT NULL
AND d.id = ds.dimension_id
ORDER BY 1, 2, 3;
ANALYZE hyper;
ANALYZE hyper_repart;