Compute basic data node rel stats from chunks

Planning of a data node rel during a distributed query should use the accumulated stats from the individual chunks that the data node rel represents. Since the data node rel is not a real base rel (i.e., it doesn't correspond to a real relation) it doesn't have any statistics in the `pg_catalog` that can be used for planning. Thus, some functions, such as `set_baserel_size_estimates` will return strange estimates for data node rels when the planner believes it has stats (e.g., after an ANALYZE). This change fixes this issue by not relying on the planner to compute rel estimates for data node rels. Instead the accumulated estimates based on the chunks queried by the data node rel are used. This also obviates the need to compute these stats again. Given the new size estimates that this change enables, some plan/test outputs have changed and tests updated to deal with that.
2025-05-28 01:30:29 +08:00 · 2019-10-28 10:13:59 +01:00 · 2019-10-28 10:13:59 +01:00 · af0a75f8fe
commit af0a75f8fe
parent 0050810803
11 changed files with 192 additions and 172 deletions
--- a/tsl/src/debug.c
+++ b/tsl/src/debug.c
@ -425,11 +425,7 @@ tsl_debug_append_path(StringInfo buf, PlannerInfo *root, Path *path, int indent)
 		appendStringInfoString(buf, ")");
 	}

-	appendStringInfo(buf,
-					 " rows=%.0f cost=%.2f..%.2f",
-					 path->rows,
-					 path->startup_cost,
-					 path->total_cost);
+	appendStringInfo(buf, " rows=%.0f", path->rows);

 	if (path->pathkeys)
 		appendStringInfoString(buf, " has pathkeys");
--- a/tsl/src/fdw/data_node_chunk_assignment.c
+++ b/tsl/src/fdw/data_node_chunk_assignment.c
@ -93,6 +93,7 @@ data_node_chunk_assignment_assign_chunk(DataNodeChunkAssignments *scas, RelOptIn
 	sca->remote_chunk_ids =
 		lappend_int(sca->remote_chunk_ids,
 					get_remote_chunk_id_from_relid(chunkrel->serverid, rte->relid));
+	sca->pages += chunkrel->pages;
 	sca->rows += chunkrel->rows;
 	sca->tuples += chunkrel->tuples;

--- a/tsl/src/fdw/data_node_chunk_assignment.h
+++ b/tsl/src/fdw/data_node_chunk_assignment.h
@ -17,6 +17,7 @@
 #include <nodes/relation.h>
 #endif
 #include <utils/hsearch.h>
+#include <storage/block.h>

 /*
 * data node-chunk assignments map chunks to the data nodes that will be responsible
@ -29,6 +30,7 @@
 typedef struct DataNodeChunkAssignment
 {
 	Oid node_server_oid;
+	BlockNumber pages;
 	double rows;
 	double tuples;
 	Cost startup_cost;
--- a/tsl/src/fdw/data_node_scan_plan.c
+++ b/tsl/src/fdw/data_node_scan_plan.c
@ -435,11 +435,15 @@ data_node_scan_add_node_paths(PlannerInfo *root, RelOptInfo *hyper_rel)
 			data_node_chunk_assignment_get_or_create(&scas, data_node_rel);
 		TsFdwRelInfo *fpinfo;

-		/* Update the number of tuples and rows based on the chunk
-		 * assignments */
+		/* Basic stats for data node rels come from the assigned chunks since
+		 * data node rels don't correspond to real tables in the system */
+		data_node_rel->pages = sca->pages;
 		data_node_rel->tuples = sca->tuples;
 		data_node_rel->rows = sca->rows;

+		/* Should also have the same width as any queried chunk */
+		data_node_rel->reltarget->width = hyper_rel->part_rels[0]->reltarget->width;
+
 		fpinfo = fdw_relinfo_create(root,
 									data_node_rel,
 									data_node_rel->serverid,
--- a/tsl/src/fdw/relinfo.c
+++ b/tsl/src/fdw/relinfo.c
@ -239,8 +239,14 @@ fdw_relinfo_create(PlannerInfo *root, RelOptInfo *rel, Oid server_oid, Oid local
 			rel->pages = 10;
 			rel->tuples = (10 * BLCKSZ) / (rel->reltarget->width + MAXALIGN(SizeofHeapTupleHeader));
 		}
-		/* Estimate rel size as best we can with local statistics. */
-		set_baserel_size_estimates(root, rel);
+
+		/* Estimate rel size as best we can with local statistics. There are
+		 * no local statistics for data node rels since they aren't real base
+		 * rels (there's no corresponding table in the system to associate
+		 * stats with). Instead, data node rels already have basic stats set
+		 * at creation time based on data-node-chunk assignment. */
+		if (fpinfo->type != TS_FDW_RELINFO_HYPERTABLE_DATA_NODE)
+			set_baserel_size_estimates(root, rel);

 		/* Fill in basically-bogus cost estimates for use later. */
 		fdw_estimate_path_cost_size(root,
--- a/tsl/test/expected/debug_notice.out
+++ b/tsl/test/expected/debug_notice.out
@ -61,6 +61,7 @@ INSERT INTO hyper VALUES
       ('2018-05-19 13:01', 4, 4, 5.1),
       ('2018-05-20 15:08', 5, 1, 9.4),
       ('2018-05-30 13:02', 3, 2, 9.0);
+ANALYZE hyper;
 -- Optimizer debug messages shown at debug level 2
 SET client_min_messages TO DEBUG2;
 -- Turning on show_rel should show a message
@ -78,16 +79,16 @@ GROUP BY 1, 2
 HAVING avg(temp) > 4
 ORDER BY 1, 2;
 DEBUG:  In tsl_set_rel_pathlist:
-RELOPTINFO [BASEREL] (names: hyper): rows=36 width=20
+RELOPTINFO [BASEREL] (names: hyper): rows=5 width=20
 	path list:
-	Append [parents: hyper] rows=37 cost=100.00..412.58
-		CustomScan (DataNodeScanPath) [parents: hyper] rows=9 cost=100.00..128.09
-		CustomScan (DataNodeScanPath) [parents: hyper] rows=19 cost=100.00..156.21
-		CustomScan (DataNodeScanPath) [parents: hyper] rows=9 cost=100.00..128.09
-	MergeAppend [parents: hyper] rows=37 cost=300.02..418.48 has pathkeys
-		CustomScan (DataNodeScanPath) [parents: hyper] rows=9 cost=100.00..129.49 has pathkeys
-		CustomScan (DataNodeScanPath) [parents: hyper] rows=19 cost=100.00..159.00 has pathkeys
-		CustomScan (DataNodeScanPath) [parents: hyper] rows=9 cost=100.00..129.49 has pathkeys
+	Append [parents: hyper] rows=5
+		CustomScan (DataNodeScanPath) [parents: hyper] rows=1 has pathkeys
+		CustomScan (DataNodeScanPath) [parents: hyper] rows=3 has pathkeys
+		CustomScan (DataNodeScanPath) [parents: hyper] rows=1 has pathkeys
+	MergeAppend [parents: hyper] rows=5 has pathkeys
+		CustomScan (DataNodeScanPath) [parents: hyper] rows=1 has pathkeys
+		CustomScan (DataNodeScanPath) [parents: hyper] rows=3 has pathkeys
+		CustomScan (DataNodeScanPath) [parents: hyper] rows=1 has pathkeys


             time             | device | temp 
@ -115,32 +116,22 @@ GROUP BY 1, 2
 HAVING avg(temp) > 4
 ORDER BY 1, 2;
 DEBUG:  Stage GROUP_AGG in get_foreign_upper_paths:
-RELOPTINFO [OTHER_UPPER_REL] (names: hyper): rows=9 width=20
+RELOPTINFO [OTHER_UPPER_REL] (names: hyper): rows=1 width=20
 	path list:
-	CustomScan (DataNodeScanPath) [parents: hyper] rows=9 cost=100.09..128.32
-	Agg [parents: hyper] rows=3 cost=128.18..128.34
-		CustomScan (DataNodeScanPath) [parents: hyper] rows=9 cost=100.00..128.09
-	Agg [parents: hyper] rows=3 cost=100.00..129.74 has pathkeys
-		CustomScan (DataNodeScanPath) [parents: hyper] rows=9 cost=100.00..129.49 has pathkeys
+	CustomScan (DataNodeScanPath) [parents: hyper] rows=1 has pathkeys


 DEBUG:  Stage GROUP_AGG in get_foreign_upper_paths:
-RELOPTINFO [OTHER_UPPER_REL] (names: hyper): rows=19 width=20
+RELOPTINFO [OTHER_UPPER_REL] (names: hyper): rows=3 width=20
 	path list:
-	CustomScan (DataNodeScanPath) [parents: hyper] rows=19 cost=100.19..156.69
-	Agg [parents: hyper] rows=6 cost=156.61..157.18 has pathkeys
-		Sort [parents: hyper] rows=19 cost=156.61..156.66 has pathkeys
-			CustomScan (DataNodeScanPath) [parents: hyper] rows=19 cost=100.00..156.21
+	Agg [parents: hyper] rows=1 has pathkeys
+		CustomScan (DataNodeScanPath) [parents: hyper] rows=3 has pathkeys


 DEBUG:  Stage GROUP_AGG in get_foreign_upper_paths:
-RELOPTINFO [OTHER_UPPER_REL] (names: hyper): rows=9 width=20
+RELOPTINFO [OTHER_UPPER_REL] (names: hyper): rows=1 width=20
 	path list:
-	CustomScan (DataNodeScanPath) [parents: hyper] rows=9 cost=100.09..128.32
-	Agg [parents: hyper] rows=3 cost=128.18..128.34
-		CustomScan (DataNodeScanPath) [parents: hyper] rows=9 cost=100.00..128.09
-	Agg [parents: hyper] rows=3 cost=100.00..129.74 has pathkeys
-		CustomScan (DataNodeScanPath) [parents: hyper] rows=9 cost=100.00..129.49 has pathkeys
+	CustomScan (DataNodeScanPath) [parents: hyper] rows=1 has pathkeys


             time             | device | temp 
--- a/tsl/test/expected/dist_query.out
+++ b/tsl/test/expected/dist_query.out
@ -119,6 +119,8 @@ ORDER BY 1, 2, 3;
             2 |  4 |           1717986916 | 9223372036854775807
 (13 rows)

+ANALYZE hyper;
+ANALYZE hyper_repart;
 -- Run the EXPLAINs
 SET enable_partitionwise_aggregate = ON;
 \ir :TEST_QUERY_NAME
@ -132,33 +134,34 @@ FROM :TABLE_NAME
 WHERE time BETWEEN '2019-01-01' AND '2019-01-03'
 GROUP BY 1,2
 ORDER BY 1,2;
-                                                                                                                                                QUERY PLAN                                                                                                                                                
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- GroupAggregate
-   Output: (time_bucket('@ 2 days'::interval, hyper_repart."time")), hyper_repart.device, avg(hyper_repart.temp)
-   Group Key: (time_bucket('@ 2 days'::interval, hyper_repart."time")), hyper_repart.device
-   ->  Sort
-         Output: (time_bucket('@ 2 days'::interval, hyper_repart."time")), hyper_repart.device, hyper_repart.temp
-         Sort Key: (time_bucket('@ 2 days'::interval, hyper_repart."time")), hyper_repart.device
-         ->  Custom Scan (AsyncAppend)
-               Output: (time_bucket('@ 2 days'::interval, hyper_repart."time")), hyper_repart.device, hyper_repart.temp
-               ->  Append
-                     ->  Custom Scan (DataNodeScan) on public.hyper_repart hyper_repart_1
-                           Output: time_bucket('@ 2 days'::interval, hyper_repart_1."time"), hyper_repart_1.device, hyper_repart_1.temp
-                           Data node: data_node_1
-                           Chunks: _hyper_2_17_dist_chunk, _hyper_2_16_dist_chunk, _hyper_2_22_dist_chunk
-                           Remote SQL: SELECT "time", device, temp FROM public.hyper_repart WHERE _timescaledb_internal.chunks_in(hyper_repart, ARRAY[7, 6, 8]) AND (("time" >= '2019-01-01 00:00:00-08'::timestamp with time zone)) AND (("time" <= '2019-01-03 00:00:00-08'::timestamp with time zone))
-                     ->  Custom Scan (DataNodeScan) on public.hyper_repart hyper_repart_2
-                           Output: time_bucket('@ 2 days'::interval, hyper_repart_2."time"), hyper_repart_2.device, hyper_repart_2.temp
-                           Data node: data_node_2
-                           Chunks: _hyper_2_18_dist_chunk, _hyper_2_19_dist_chunk, _hyper_2_23_dist_chunk
-                           Remote SQL: SELECT "time", device, temp FROM public.hyper_repart WHERE _timescaledb_internal.chunks_in(hyper_repart, ARRAY[6, 7, 8]) AND (("time" >= '2019-01-01 00:00:00-08'::timestamp with time zone)) AND (("time" <= '2019-01-03 00:00:00-08'::timestamp with time zone))
-                     ->  Custom Scan (DataNodeScan) on public.hyper_repart hyper_repart_3
-                           Output: time_bucket('@ 2 days'::interval, hyper_repart_3."time"), hyper_repart_3.device, hyper_repart_3.temp
-                           Data node: data_node_3
-                           Chunks: _hyper_2_21_dist_chunk, _hyper_2_20_dist_chunk
-                           Remote SQL: SELECT "time", device, temp FROM public.hyper_repart WHERE _timescaledb_internal.chunks_in(hyper_repart, ARRAY[7, 6]) AND (("time" >= '2019-01-01 00:00:00-08'::timestamp with time zone)) AND (("time" <= '2019-01-03 00:00:00-08'::timestamp with time zone))
-(24 rows)
+                                                                                                                                                                                                                                             QUERY PLAN                                                                                                                                                                                                                                              
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Finalize GroupAggregate
+   Output: (time_bucket('@ 2 days'::interval, "time")), device, avg(temp)
+   Group Key: (time_bucket('@ 2 days'::interval, "time")), device
+   ->  Custom Scan (AsyncAppend)
+         Output: (time_bucket('@ 2 days'::interval, "time")), device, (PARTIAL avg(temp))
+         ->  Merge Append
+               Sort Key: (time_bucket('@ 2 days'::interval, hyper_repart."time")), hyper_repart.device
+               ->  Custom Scan (DataNodeScan)
+                     Output: (time_bucket('@ 2 days'::interval, hyper_repart."time")), hyper_repart.device, (PARTIAL avg(hyper_repart.temp))
+                     Relations: Aggregate on (public.hyper_repart)
+                     Data node: data_node_1
+                     Chunks: _hyper_2_17_dist_chunk, _hyper_2_16_dist_chunk, _hyper_2_22_dist_chunk
+                     Remote SQL: SELECT public.time_bucket('@ 2 days'::interval, "time"), device, _timescaledb_internal.partialize_agg(avg(temp)) FROM public.hyper_repart WHERE _timescaledb_internal.chunks_in(hyper_repart, ARRAY[7, 6, 8]) AND (("time" >= '2019-01-01 00:00:00-08'::timestamp with time zone)) AND (("time" <= '2019-01-03 00:00:00-08'::timestamp with time zone)) GROUP BY 1, 2 ORDER BY public.time_bucket('2 days'::interval, "time") ASC NULLS LAST, device ASC NULLS LAST
+               ->  Custom Scan (DataNodeScan)
+                     Output: (time_bucket('@ 2 days'::interval, hyper_repart_1."time")), hyper_repart_1.device, (PARTIAL avg(hyper_repart_1.temp))
+                     Relations: Aggregate on (public.hyper_repart)
+                     Data node: data_node_2
+                     Chunks: _hyper_2_18_dist_chunk, _hyper_2_19_dist_chunk, _hyper_2_23_dist_chunk
+                     Remote SQL: SELECT public.time_bucket('@ 2 days'::interval, "time"), device, _timescaledb_internal.partialize_agg(avg(temp)) FROM public.hyper_repart WHERE _timescaledb_internal.chunks_in(hyper_repart, ARRAY[6, 7, 8]) AND (("time" >= '2019-01-01 00:00:00-08'::timestamp with time zone)) AND (("time" <= '2019-01-03 00:00:00-08'::timestamp with time zone)) GROUP BY 1, 2 ORDER BY public.time_bucket('2 days'::interval, "time") ASC NULLS LAST, device ASC NULLS LAST
+               ->  Custom Scan (DataNodeScan)
+                     Output: (time_bucket('@ 2 days'::interval, hyper_repart_2."time")), hyper_repart_2.device, (PARTIAL avg(hyper_repart_2.temp))
+                     Relations: Aggregate on (public.hyper_repart)
+                     Data node: data_node_3
+                     Chunks: _hyper_2_21_dist_chunk, _hyper_2_20_dist_chunk
+                     Remote SQL: SELECT public.time_bucket('@ 2 days'::interval, "time"), device, _timescaledb_internal.partialize_agg(avg(temp)) FROM public.hyper_repart WHERE _timescaledb_internal.chunks_in(hyper_repart, ARRAY[7, 6]) AND (("time" >= '2019-01-01 00:00:00-08'::timestamp with time zone)) AND (("time" <= '2019-01-03 00:00:00-08'::timestamp with time zone)) GROUP BY 1, 2 ORDER BY public.time_bucket('2 days'::interval, "time") ASC NULLS LAST, device ASC NULLS LAST
+(25 rows)

 -- Query doesn't cover repartitioning boundary, so safe to push down
 -- bucketing
@ -186,13 +189,15 @@ ORDER BY 1,2;
               Data node: data_node_2
               Chunks: _hyper_2_18_dist_chunk
               Remote SQL: SELECT public.time_bucket('@ 2 days'::interval, "time"), device, avg(temp) FROM public.hyper_repart WHERE _timescaledb_internal.chunks_in(hyper_repart, ARRAY[6]) AND (("time" >= '2019-01-01 00:00:00-08'::timestamp with time zone)) AND (("time" <= '2019-01-01 15:00:00-08'::timestamp with time zone)) GROUP BY 1, 2 ORDER BY public.time_bucket('2 days'::interval, "time") ASC NULLS LAST, device ASC NULLS LAST
-         ->  Custom Scan (DataNodeScan)
-               Output: (time_bucket('@ 2 days'::interval, hyper_repart_2."time")), hyper_repart_2.device, (avg(hyper_repart_2.temp))
-               Relations: Aggregate on (public.hyper_repart)
-               Data node: data_node_3
-               Chunks: _hyper_2_21_dist_chunk
-               Remote SQL: SELECT public.time_bucket('@ 2 days'::interval, "time"), device, avg(temp) FROM public.hyper_repart WHERE _timescaledb_internal.chunks_in(hyper_repart, ARRAY[7]) AND (("time" >= '2019-01-01 00:00:00-08'::timestamp with time zone)) AND (("time" <= '2019-01-01 15:00:00-08'::timestamp with time zone)) GROUP BY 1, 2 ORDER BY public.time_bucket('2 days'::interval, "time") ASC NULLS LAST, device ASC NULLS LAST
-(22 rows)
+         ->  GroupAggregate
+               Output: (time_bucket('@ 2 days'::interval, hyper_repart_2."time")), hyper_repart_2.device, avg(hyper_repart_2.temp)
+               Group Key: time_bucket('@ 2 days'::interval, hyper_repart_2."time"), hyper_repart_2.device
+               ->  Custom Scan (DataNodeScan) on public.hyper_repart hyper_repart_2
+                     Output: time_bucket('@ 2 days'::interval, hyper_repart_2."time"), hyper_repart_2.device, hyper_repart_2.temp
+                     Data node: data_node_3
+                     Chunks: _hyper_2_21_dist_chunk
+                     Remote SQL: SELECT "time", device, temp FROM public.hyper_repart WHERE _timescaledb_internal.chunks_in(hyper_repart, ARRAY[7]) AND (("time" >= '2019-01-01 00:00:00-08'::timestamp with time zone)) AND (("time" <= '2019-01-01 15:00:00-08'::timestamp with time zone)) ORDER BY public.time_bucket('2 days'::interval, "time") ASC NULLS LAST, device ASC NULLS LAST
+(24 rows)

 -- Run the queries for each setting. Each setting's result is
 -- generated into its own file
--- a/tsl/test/expected/partitionwise_distributed-11.out
+++ b/tsl/test/expected/partitionwise_distributed-11.out
@ -2012,14 +2012,15 @@ WHERE time < '2018-06-01 00:00'
 GROUP BY 1, 2
 HAVING avg(temp) * custom_sum(device) > 0.8
 LIMIT 1;
-                                                                                                    QUERY PLAN                                                                                                     
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+                                                                                                                               QUERY PLAN                                                                                                                                
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 Limit
   Output: "time", device, (avg(temp)), (((sum(temp) * random()) * (device)::double precision))
   ->  Custom Scan (AsyncAppend)
         Output: "time", device, (avg(temp)), (((sum(temp) * random()) * (device)::double precision))
-         ->  Append
-               ->  HashAggregate
+         ->  Merge Append
+               Sort Key: hyper."time", hyper.device
+               ->  GroupAggregate
                     Output: hyper."time", hyper.device, avg(hyper.temp), ((sum(hyper.temp) * random()) * (hyper.device)::double precision)
                     Group Key: hyper."time", hyper.device
                     Filter: ((avg(hyper.temp) * (custom_sum(hyper.device))::double precision) > '0.8'::double precision)
@ -2027,8 +2028,8 @@ LIMIT 1;
                           Output: hyper."time", hyper.device, hyper.temp
                           Data node: data_node_1
                           Chunks: _hyper_1_1_dist_chunk, _hyper_1_3_dist_chunk
-                           Remote SQL: SELECT "time", device, temp FROM public.hyper WHERE _timescaledb_internal.chunks_in(hyper, ARRAY[1, 2]) AND (("time" < '2018-06-01 00:00:00-07'::timestamp with time zone))
-               ->  HashAggregate
+                           Remote SQL: SELECT "time", device, temp FROM public.hyper WHERE _timescaledb_internal.chunks_in(hyper, ARRAY[1, 2]) AND (("time" < '2018-06-01 00:00:00-07'::timestamp with time zone)) ORDER BY "time" ASC NULLS LAST, device ASC NULLS LAST
+               ->  GroupAggregate
                     Output: hyper_1."time", hyper_1.device, avg(hyper_1.temp), ((sum(hyper_1.temp) * random()) * (hyper_1.device)::double precision)
                     Group Key: hyper_1."time", hyper_1.device
                     Filter: ((avg(hyper_1.temp) * (custom_sum(hyper_1.device))::double precision) > '0.8'::double precision)
@ -2036,8 +2037,8 @@ LIMIT 1;
                           Output: hyper_1."time", hyper_1.device, hyper_1.temp
                           Data node: data_node_2
                           Chunks: _hyper_1_4_dist_chunk, _hyper_1_2_dist_chunk
-                           Remote SQL: SELECT "time", device, temp FROM public.hyper WHERE _timescaledb_internal.chunks_in(hyper, ARRAY[2, 1]) AND (("time" < '2018-06-01 00:00:00-07'::timestamp with time zone))
-(23 rows)
+                           Remote SQL: SELECT "time", device, temp FROM public.hyper WHERE _timescaledb_internal.chunks_in(hyper, ARRAY[2, 1]) AND (("time" < '2018-06-01 00:00:00-07'::timestamp with time zone)) ORDER BY "time" ASC NULLS LAST, device ASC NULLS LAST
+(24 rows)

 -- not pushed down because of non-shippable expression on the
 -- underlying rel
@ -2086,30 +2087,32 @@ WHERE (hyper.temp * random() <= 20)
 AND time < '2018-06-01 00:00'
 GROUP BY 1, 2
 LIMIT 1;
-                                                                                                                               QUERY PLAN                                                                                                                                
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+                                                                                                    QUERY PLAN                                                                                                     
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 Limit
-   Output: hyper."time", hyper.device, (avg(hyper.temp))
-   ->  GroupAggregate
-         Output: hyper."time", hyper.device, avg(hyper.temp)
-         Group Key: hyper."time", hyper.device
-         ->  Custom Scan (AsyncAppend)
-               Output: hyper."time", hyper.device, hyper.temp
-               ->  Merge Append
-                     Sort Key: hyper_1."time", hyper_1.device
+   Output: "time", device, (avg(temp))
+   ->  Custom Scan (AsyncAppend)
+         Output: "time", device, (avg(temp))
+         ->  Append
+               ->  HashAggregate
+                     Output: hyper."time", hyper.device, avg(hyper.temp)
+                     Group Key: hyper."time", hyper.device
+                     ->  Custom Scan (DataNodeScan) on public.hyper
+                           Output: hyper."time", hyper.device, hyper.temp
+                           Filter: ((hyper.temp * random()) <= '20'::double precision)
+                           Data node: data_node_1
+                           Chunks: _hyper_1_1_dist_chunk, _hyper_1_3_dist_chunk
+                           Remote SQL: SELECT "time", device, temp FROM public.hyper WHERE _timescaledb_internal.chunks_in(hyper, ARRAY[1, 2]) AND (("time" < '2018-06-01 00:00:00-07'::timestamp with time zone))
+               ->  HashAggregate
+                     Output: hyper_1."time", hyper_1.device, avg(hyper_1.temp)
+                     Group Key: hyper_1."time", hyper_1.device
                     ->  Custom Scan (DataNodeScan) on public.hyper hyper_1
                           Output: hyper_1."time", hyper_1.device, hyper_1.temp
                           Filter: ((hyper_1.temp * random()) <= '20'::double precision)
-                           Data node: data_node_1
-                           Chunks: _hyper_1_1_dist_chunk, _hyper_1_3_dist_chunk
-                           Remote SQL: SELECT "time", device, temp FROM public.hyper WHERE _timescaledb_internal.chunks_in(hyper, ARRAY[1, 2]) AND (("time" < '2018-06-01 00:00:00-07'::timestamp with time zone)) ORDER BY "time" ASC NULLS LAST, device ASC NULLS LAST
-                     ->  Custom Scan (DataNodeScan) on public.hyper hyper_2
-                           Output: hyper_2."time", hyper_2.device, hyper_2.temp
-                           Filter: ((hyper_2.temp * random()) <= '20'::double precision)
                           Data node: data_node_2
                           Chunks: _hyper_1_4_dist_chunk, _hyper_1_2_dist_chunk
-                           Remote SQL: SELECT "time", device, temp FROM public.hyper WHERE _timescaledb_internal.chunks_in(hyper, ARRAY[2, 1]) AND (("time" < '2018-06-01 00:00:00-07'::timestamp with time zone)) ORDER BY "time" ASC NULLS LAST, device ASC NULLS LAST
-(21 rows)
+                           Remote SQL: SELECT "time", device, temp FROM public.hyper WHERE _timescaledb_internal.chunks_in(hyper, ARRAY[2, 1]) AND (("time" < '2018-06-01 00:00:00-07'::timestamp with time zone))
+(23 rows)

 -- contains whitelisted time expressions
 SELECT test_override_pushdown_timestamptz('2018-06-01 00:00'::timestamptz);
--- a/tsl/test/expected/partitionwise_distributed-12.out
+++ b/tsl/test/expected/partitionwise_distributed-12.out
@ -842,28 +842,30 @@ FROM hyper
 WHERE time > '2018-04-19 00:01'
 GROUP BY 1
 ORDER BY 1;
-                                                                                                                                         QUERY PLAN                                                                                                                                          
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- Finalize GroupAggregate
-   Output: device, avg(temp)
-   Group Key: device
-   ->  Custom Scan (AsyncAppend)
-         Output: device, (PARTIAL avg(temp))
-         ->  Merge Append
-               Sort Key: hyper.device
-               ->  Custom Scan (DataNodeScan)
-                     Output: hyper.device, (PARTIAL avg(hyper.temp))
-                     Relations: Aggregate on (public.hyper)
-                     Data node: data_node_1
-                     Chunks: _hyper_1_3_dist_chunk, _hyper_1_5_dist_chunk, _hyper_1_7_dist_chunk
-                     Remote SQL: SELECT device, _timescaledb_internal.partialize_agg(avg(temp)) FROM public.hyper WHERE _timescaledb_internal.chunks_in(hyper, ARRAY[2, 3, 4]) AND (("time" > '2018-04-19 00:01:00-07'::timestamp with time zone)) GROUP BY 1 ORDER BY device ASC NULLS LAST
-               ->  Custom Scan (DataNodeScan)
-                     Output: hyper_1.device, (PARTIAL avg(hyper_1.temp))
-                     Relations: Aggregate on (public.hyper)
-                     Data node: data_node_2
-                     Chunks: _hyper_1_4_dist_chunk, _hyper_1_6_dist_chunk
-                     Remote SQL: SELECT device, _timescaledb_internal.partialize_agg(avg(temp)) FROM public.hyper WHERE _timescaledb_internal.chunks_in(hyper, ARRAY[2, 3]) AND (("time" > '2018-04-19 00:01:00-07'::timestamp with time zone)) GROUP BY 1 ORDER BY device ASC NULLS LAST
-(19 rows)
+                                                                                                                             QUERY PLAN                                                                                                                             
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Sort
+   Output: device, (avg(temp))
+   Sort Key: device
+   ->  Finalize HashAggregate
+         Output: device, avg(temp)
+         Group Key: device
+         ->  Custom Scan (AsyncAppend)
+               Output: device, (PARTIAL avg(temp))
+               ->  Append
+                     ->  Custom Scan (DataNodeScan)
+                           Output: hyper.device, (PARTIAL avg(hyper.temp))
+                           Relations: Aggregate on (public.hyper)
+                           Data node: data_node_1
+                           Chunks: _hyper_1_3_dist_chunk, _hyper_1_5_dist_chunk, _hyper_1_7_dist_chunk
+                           Remote SQL: SELECT device, _timescaledb_internal.partialize_agg(avg(temp)) FROM public.hyper WHERE _timescaledb_internal.chunks_in(hyper, ARRAY[2, 3, 4]) AND (("time" > '2018-04-19 00:01:00-07'::timestamp with time zone)) GROUP BY 1
+                     ->  Custom Scan (DataNodeScan)
+                           Output: hyper_1.device, (PARTIAL avg(hyper_1.temp))
+                           Relations: Aggregate on (public.hyper)
+                           Data node: data_node_2
+                           Chunks: _hyper_1_4_dist_chunk, _hyper_1_6_dist_chunk
+                           Remote SQL: SELECT device, _timescaledb_internal.partialize_agg(avg(temp)) FROM public.hyper WHERE _timescaledb_internal.chunks_in(hyper, ARRAY[2, 3]) AND (("time" > '2018-04-19 00:01:00-07'::timestamp with time zone)) GROUP BY 1
+(21 rows)

 SELECT device, avg(temp)
 FROM hyper
@ -1270,29 +1272,28 @@ FROM hyper
 WHERE time BETWEEN '2018-04-19 00:01' AND '2018-06-01 00:00'
 GROUP BY 1, 2
 ORDER BY 1, 2;
-                                                                                                                                                              QUERY PLAN                                                                                                                                                              
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- Custom Scan (AsyncAppend)
-   Output: "time", device, (avg(temp))
-   ->  Merge Append
+                                                                                                                                      QUERY PLAN                                                                                                                                      
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ GroupAggregate
+   Output: hyper."time", hyper.device, avg(hyper.temp)
+   Group Key: hyper."time", hyper.device
+   ->  Sort
+         Output: hyper."time", hyper.device, hyper.temp
         Sort Key: hyper."time", hyper.device
-         ->  GroupAggregate
-               Output: hyper."time", hyper.device, avg(hyper.temp)
-               Group Key: hyper."time", hyper.device
-               ->  Custom Scan (DataNodeScan) on public.hyper
-                     Output: hyper."time", hyper.device, hyper.temp
-                     Data node: data_node_1
-                     Chunks: _hyper_1_3_dist_chunk
-                     Remote SQL: SELECT "time", device, temp FROM public.hyper WHERE _timescaledb_internal.chunks_in(hyper, ARRAY[2]) AND (("time" >= '2018-04-19 00:01:00-07'::timestamp with time zone)) AND (("time" <= '2018-06-01 00:00:00-07'::timestamp with time zone)) ORDER BY "time" ASC NULLS LAST, device ASC NULLS LAST
-         ->  GroupAggregate
-               Output: hyper_1."time", hyper_1.device, avg(hyper_1.temp)
-               Group Key: hyper_1."time", hyper_1.device
-               ->  Custom Scan (DataNodeScan) on public.hyper hyper_1
-                     Output: hyper_1."time", hyper_1.device, hyper_1.temp
-                     Data node: data_node_2
-                     Chunks: _hyper_1_4_dist_chunk
-                     Remote SQL: SELECT "time", device, temp FROM public.hyper WHERE _timescaledb_internal.chunks_in(hyper, ARRAY[2]) AND (("time" >= '2018-04-19 00:01:00-07'::timestamp with time zone)) AND (("time" <= '2018-06-01 00:00:00-07'::timestamp with time zone)) ORDER BY "time" ASC NULLS LAST, device ASC NULLS LAST
-(20 rows)
+         ->  Custom Scan (AsyncAppend)
+               Output: hyper."time", hyper.device, hyper.temp
+               ->  Append
+                     ->  Custom Scan (DataNodeScan) on public.hyper hyper_1
+                           Output: hyper_1."time", hyper_1.device, hyper_1.temp
+                           Data node: data_node_1
+                           Chunks: _hyper_1_3_dist_chunk
+                           Remote SQL: SELECT "time", device, temp FROM public.hyper WHERE _timescaledb_internal.chunks_in(hyper, ARRAY[2]) AND (("time" >= '2018-04-19 00:01:00-07'::timestamp with time zone)) AND (("time" <= '2018-06-01 00:00:00-07'::timestamp with time zone))
+                     ->  Custom Scan (DataNodeScan) on public.hyper hyper_2
+                           Output: hyper_2."time", hyper_2.device, hyper_2.temp
+                           Data node: data_node_2
+                           Chunks: _hyper_1_4_dist_chunk
+                           Remote SQL: SELECT "time", device, temp FROM public.hyper WHERE _timescaledb_internal.chunks_in(hyper, ARRAY[2]) AND (("time" >= '2018-04-19 00:01:00-07'::timestamp with time zone)) AND (("time" <= '2018-06-01 00:00:00-07'::timestamp with time zone))
+(19 rows)

 -- Test HAVING qual
 EXPLAIN (VERBOSE, COSTS OFF)
@ -1344,31 +1345,35 @@ WHERE time BETWEEN '2018-04-19 00:01' AND '2018-06-01 00:00'
 GROUP BY 1, 2
 HAVING avg(temp) > 4
 ORDER BY 1, 2;
-                                                                                                                                                              QUERY PLAN                                                                                                                                                              
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- Custom Scan (AsyncAppend)
-   Output: "time", device, (avg(temp))
-   ->  Merge Append
-         Sort Key: hyper."time", hyper.device
-         ->  GroupAggregate
-               Output: hyper."time", hyper.device, avg(hyper.temp)
-               Group Key: hyper."time", hyper.device
-               Filter: (avg(hyper.temp) > '4'::double precision)
+                                                                                                                                   QUERY PLAN                                                                                                                                   
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Merge Append
+   Sort Key: hyper."time", hyper.device
+   ->  GroupAggregate
+         Output: hyper."time", hyper.device, avg(hyper.temp)
+         Group Key: hyper."time", hyper.device
+         Filter: (avg(hyper.temp) > '4'::double precision)
+         ->  Sort
+               Output: hyper."time", hyper.device, hyper.temp
+               Sort Key: hyper."time", hyper.device
               ->  Custom Scan (DataNodeScan) on public.hyper
                     Output: hyper."time", hyper.device, hyper.temp
                     Data node: data_node_1
                     Chunks: _hyper_1_3_dist_chunk
-                     Remote SQL: SELECT "time", device, temp FROM public.hyper WHERE _timescaledb_internal.chunks_in(hyper, ARRAY[2]) AND (("time" >= '2018-04-19 00:01:00-07'::timestamp with time zone)) AND (("time" <= '2018-06-01 00:00:00-07'::timestamp with time zone)) ORDER BY "time" ASC NULLS LAST, device ASC NULLS LAST
-         ->  GroupAggregate
-               Output: hyper_1."time", hyper_1.device, avg(hyper_1.temp)
-               Group Key: hyper_1."time", hyper_1.device
-               Filter: (avg(hyper_1.temp) > '4'::double precision)
+                     Remote SQL: SELECT "time", device, temp FROM public.hyper WHERE _timescaledb_internal.chunks_in(hyper, ARRAY[2]) AND (("time" >= '2018-04-19 00:01:00-07'::timestamp with time zone)) AND (("time" <= '2018-06-01 00:00:00-07'::timestamp with time zone))
+   ->  GroupAggregate
+         Output: hyper_1."time", hyper_1.device, avg(hyper_1.temp)
+         Group Key: hyper_1."time", hyper_1.device
+         Filter: (avg(hyper_1.temp) > '4'::double precision)
+         ->  Sort
+               Output: hyper_1."time", hyper_1.device, hyper_1.temp
+               Sort Key: hyper_1."time", hyper_1.device
               ->  Custom Scan (DataNodeScan) on public.hyper hyper_1
                     Output: hyper_1."time", hyper_1.device, hyper_1.temp
                     Data node: data_node_2
                     Chunks: _hyper_1_4_dist_chunk
-                     Remote SQL: SELECT "time", device, temp FROM public.hyper WHERE _timescaledb_internal.chunks_in(hyper, ARRAY[2]) AND (("time" >= '2018-04-19 00:01:00-07'::timestamp with time zone)) AND (("time" <= '2018-06-01 00:00:00-07'::timestamp with time zone)) ORDER BY "time" ASC NULLS LAST, device ASC NULLS LAST
-(22 rows)
+                     Remote SQL: SELECT "time", device, temp FROM public.hyper WHERE _timescaledb_internal.chunks_in(hyper, ARRAY[2]) AND (("time" >= '2018-04-19 00:01:00-07'::timestamp with time zone)) AND (("time" <= '2018-06-01 00:00:00-07'::timestamp with time zone))
+(26 rows)

 SELECT time, device, avg(temp) AS temp
 FROM hyper
@ -1942,30 +1947,31 @@ FROM hyper
 WHERE time < '2018-06-01 00:00'
 GROUP BY 1, 2
 LIMIT 1;
-                                                                                                    QUERY PLAN                                                                                                     
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+                                                                                                                               QUERY PLAN                                                                                                                                
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 Limit
   Output: "time", device, (avg(temp)), ((random() * (device)::double precision)), (custom_sum(device))
   ->  Custom Scan (AsyncAppend)
         Output: "time", device, (avg(temp)), ((random() * (device)::double precision)), (custom_sum(device))
-         ->  Append
-               ->  HashAggregate
+         ->  Merge Append
+               Sort Key: hyper."time", hyper.device
+               ->  GroupAggregate
                     Output: hyper."time", hyper.device, avg(hyper.temp), (random() * (hyper.device)::double precision), custom_sum(hyper.device)
                     Group Key: hyper."time", hyper.device
                     ->  Custom Scan (DataNodeScan) on public.hyper
                           Output: hyper."time", hyper.device, hyper.temp
                           Data node: data_node_1
                           Chunks: _hyper_1_1_dist_chunk, _hyper_1_3_dist_chunk
-                           Remote SQL: SELECT "time", device, temp FROM public.hyper WHERE _timescaledb_internal.chunks_in(hyper, ARRAY[1, 2]) AND (("time" < '2018-06-01 00:00:00-07'::timestamp with time zone))
-               ->  HashAggregate
+                           Remote SQL: SELECT "time", device, temp FROM public.hyper WHERE _timescaledb_internal.chunks_in(hyper, ARRAY[1, 2]) AND (("time" < '2018-06-01 00:00:00-07'::timestamp with time zone)) ORDER BY "time" ASC NULLS LAST, device ASC NULLS LAST
+               ->  GroupAggregate
                     Output: hyper_1."time", hyper_1.device, avg(hyper_1.temp), (random() * (hyper_1.device)::double precision), custom_sum(hyper_1.device)
                     Group Key: hyper_1."time", hyper_1.device
                     ->  Custom Scan (DataNodeScan) on public.hyper hyper_1
                           Output: hyper_1."time", hyper_1.device, hyper_1.temp
                           Data node: data_node_2
                           Chunks: _hyper_1_4_dist_chunk, _hyper_1_2_dist_chunk
-                           Remote SQL: SELECT "time", device, temp FROM public.hyper WHERE _timescaledb_internal.chunks_in(hyper, ARRAY[2, 1]) AND (("time" < '2018-06-01 00:00:00-07'::timestamp with time zone))
-(21 rows)
+                           Remote SQL: SELECT "time", device, temp FROM public.hyper WHERE _timescaledb_internal.chunks_in(hyper, ARRAY[2, 1]) AND (("time" < '2018-06-01 00:00:00-07'::timestamp with time zone)) ORDER BY "time" ASC NULLS LAST, device ASC NULLS LAST
+(22 rows)

 EXPLAIN (VERBOSE, COSTS OFF)
 SELECT time, device, avg(temp), sum(temp) * random() * device as sum_temp
@ -2010,14 +2016,15 @@ WHERE time < '2018-06-01 00:00'
 GROUP BY 1, 2
 HAVING avg(temp) * custom_sum(device) > 0.8
 LIMIT 1;
-                                                                                                    QUERY PLAN                                                                                                     
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+                                                                                                                               QUERY PLAN                                                                                                                                
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 Limit
   Output: "time", device, (avg(temp)), (((sum(temp) * random()) * (device)::double precision))
   ->  Custom Scan (AsyncAppend)
         Output: "time", device, (avg(temp)), (((sum(temp) * random()) * (device)::double precision))
-         ->  Append
-               ->  HashAggregate
+         ->  Merge Append
+               Sort Key: hyper."time", hyper.device
+               ->  GroupAggregate
                     Output: hyper."time", hyper.device, avg(hyper.temp), ((sum(hyper.temp) * random()) * (hyper.device)::double precision)
                     Group Key: hyper."time", hyper.device
                     Filter: ((avg(hyper.temp) * (custom_sum(hyper.device))::double precision) > '0.8'::double precision)
@ -2025,8 +2032,8 @@ LIMIT 1;
                           Output: hyper."time", hyper.device, hyper.temp
                           Data node: data_node_1
                           Chunks: _hyper_1_1_dist_chunk, _hyper_1_3_dist_chunk
-                           Remote SQL: SELECT "time", device, temp FROM public.hyper WHERE _timescaledb_internal.chunks_in(hyper, ARRAY[1, 2]) AND (("time" < '2018-06-01 00:00:00-07'::timestamp with time zone))
-               ->  HashAggregate
+                           Remote SQL: SELECT "time", device, temp FROM public.hyper WHERE _timescaledb_internal.chunks_in(hyper, ARRAY[1, 2]) AND (("time" < '2018-06-01 00:00:00-07'::timestamp with time zone)) ORDER BY "time" ASC NULLS LAST, device ASC NULLS LAST
+               ->  GroupAggregate
                     Output: hyper_1."time", hyper_1.device, avg(hyper_1.temp), ((sum(hyper_1.temp) * random()) * (hyper_1.device)::double precision)
                     Group Key: hyper_1."time", hyper_1.device
                     Filter: ((avg(hyper_1.temp) * (custom_sum(hyper_1.device))::double precision) > '0.8'::double precision)
@ -2034,8 +2041,8 @@ LIMIT 1;
                           Output: hyper_1."time", hyper_1.device, hyper_1.temp
                           Data node: data_node_2
                           Chunks: _hyper_1_4_dist_chunk, _hyper_1_2_dist_chunk
-                           Remote SQL: SELECT "time", device, temp FROM public.hyper WHERE _timescaledb_internal.chunks_in(hyper, ARRAY[2, 1]) AND (("time" < '2018-06-01 00:00:00-07'::timestamp with time zone))
-(23 rows)
+                           Remote SQL: SELECT "time", device, temp FROM public.hyper WHERE _timescaledb_internal.chunks_in(hyper, ARRAY[2, 1]) AND (("time" < '2018-06-01 00:00:00-07'::timestamp with time zone)) ORDER BY "time" ASC NULLS LAST, device ASC NULLS LAST
+(24 rows)

 -- not pushed down because of non-shippable expression on the
 -- underlying rel
--- a/tsl/test/sql/debug_notice.sql
+++ b/tsl/test/sql/debug_notice.sql
@ -48,6 +48,8 @@ INSERT INTO hyper VALUES
       ('2018-05-20 15:08', 5, 1, 9.4),
       ('2018-05-30 13:02', 3, 2, 9.0);

+ANALYZE hyper;
+
 -- Optimizer debug messages shown at debug level 2
 SET client_min_messages TO DEBUG2;

--- a/tsl/test/sql/include/dist_query_load.sql
+++ b/tsl/test/sql/include/dist_query_load.sql
@ -50,3 +50,6 @@ FROM _timescaledb_catalog.dimension d, _timescaledb_catalog.dimension_slice ds
 WHERE num_slices IS NOT NULL
 AND d.id = ds.dimension_id
 ORDER BY 1, 2, 3;
+
+ANALYZE hyper;
+ANALYZE hyper_repart;