Improve relation size estimate

In case when there are no stats (number of tuples/pages) we can use two approaches to estimate relation size: interpolate relation size using stats from previous chunks (if exists) or estimate using shared buffer size (shared buffer size should align with chunk size).
2025-05-16 10:33:27 +08:00 · 2019-11-06 22:24:34 +01:00 · 2019-11-06 22:24:34 +01:00 · c60cabd768
commit c60cabd768
parent 29ce1510a5
19 changed files with 1872 additions and 799 deletions
--- a/src/chunk.c
+++ b/src/chunk.c
@ -2299,6 +2299,36 @@ ts_chunk_get_by_id(int32 id, bool fail_if_not_found)
 	return chunk_scan_find(CHUNK_ID_INDEX, scankey, 1, CurrentMemoryContext, fail_if_not_found);
 }

+/*
+ * Number of chunks created after given chunk.
+ * If chunk2.id > chunk1.id then chunk2 is created after chunk1
+ */
+int
+ts_chunk_num_of_chunks_created_after(const Chunk *chunk)
+{
+	ScanKeyData scankey[1];
+
+	/*
+	 * Try to find chunks with a greater Id then a given chunk
+	 */
+	ScanKeyInit(&scankey[0],
+				Anum_chunk_idx_id,
+				BTGreaterStrategyNumber,
+				F_INT4GT,
+				Int32GetDatum(chunk->fd.id));
+
+	return chunk_scan_internal(CHUNK_ID_INDEX,
+							   scankey,
+							   1,
+							   NULL,
+							   NULL,
+							   NULL,
+							   0,
+							   ForwardScanDirection,
+							   AccessShareLock,
+							   CurrentMemoryContext);
+}
+
 /*
 * Simple scans provide lightweight ways to access chunk information without the
 * overhead of getting a full chunk (i.e., no extra metadata, like constraints,
--- a/src/chunk.h
+++ b/src/chunk.h
@ -136,6 +136,7 @@ extern Oid ts_chunk_get_schema_id(int32 chunk_id, bool missing_ok);
 extern bool ts_chunk_get_id(const char *schema, const char *table, int32 *chunk_id,
 							bool missing_ok);
 extern bool ts_chunk_exists_relid(Oid relid);
+extern TSDLLEXPORT int ts_chunk_num_of_chunks_created_after(const Chunk *chunk);
 extern TSDLLEXPORT bool ts_chunk_exists_with_compression(int32 hypertable_id);
 extern void ts_chunk_recreate_all_constraints_for_dimension(Hyperspace *hs, int32 dimension_id);
 extern TSDLLEXPORT void ts_chunk_drop_fks(Chunk *const chunk);
@ -144,7 +145,8 @@ extern int ts_chunk_delete_by_hypertable_id(int32 hypertable_id);
 extern int ts_chunk_delete_by_name(const char *schema, const char *table, DropBehavior behavior);
 extern bool ts_chunk_set_name(Chunk *chunk, const char *newname);
 extern bool ts_chunk_set_schema(Chunk *chunk, const char *newschema);
-extern List *ts_chunk_get_window(int32 dimension_id, int64 point, int count, MemoryContext mctx);
+extern TSDLLEXPORT List *ts_chunk_get_window(int32 dimension_id, int64 point, int count,
+											 MemoryContext mctx);
 extern void ts_chunks_rename_schema_name(char *old_schema, char *new_schema);
 extern TSDLLEXPORT bool ts_chunk_set_compressed_chunk(Chunk *chunk, int32 compressed_chunk_id,
 													  bool isnull);
--- a/src/chunk_adaptive.c
+++ b/src/chunk_adaptive.c
@ -114,8 +114,8 @@ get_memory_cache_size(void)
 */
 #define DEFAULT_CACHE_MEMORY_SLACK 0.9

-static inline int64
-calculate_initial_chunk_target_size(void)
+extern inline int64
+ts_chunk_calculate_initial_chunk_target_size(void)
 {
 	return (int64)((double) get_memory_cache_size() * DEFAULT_CACHE_MEMORY_SLACK);
 }
@ -662,7 +662,7 @@ chunk_target_size_in_bytes(const text *target_size_text)
 		return 0;

 	if (pg_strcasecmp(target_size, "estimate") == 0)
-		target_size_bytes = calculate_initial_chunk_target_size();
+		target_size_bytes = ts_chunk_calculate_initial_chunk_target_size();
 	else
 		target_size_bytes = convert_text_memory_amount_to_bytes(target_size);

--- a/src/chunk_adaptive.h
+++ b/src/chunk_adaptive.h
@ -27,4 +27,6 @@ extern void ts_chunk_adaptive_sizing_info_validate(ChunkSizingInfo *info);
 extern void ts_chunk_sizing_func_validate(regproc func, ChunkSizingInfo *info);
 extern TSDLLEXPORT ChunkSizingInfo *ts_chunk_sizing_info_get_default_disabled(Oid table_relid);

+extern TSDLLEXPORT int64 ts_chunk_calculate_initial_chunk_target_size(void);
+
 #endif /* TIMESCALEDB_CHUNK_ADAPTIVE_H */
--- a/test/sql/utils/testsupport.sql
+++ b/test/sql/utils/testsupport.sql
@ -279,3 +279,6 @@ $BODY$
 BEGIN
 END
 $BODY$;
+
+CREATE OR REPLACE FUNCTION ts_test_override_current_timestamptz(new_value TIMESTAMPTZ) 
+RETURNS VOID AS :TSL_MODULE_PATHNAME, 'ts_test_override_current_timestamptz' LANGUAGE C VOLATILE STRICT;
--- a/tsl/src/fdw/estimate.c
+++ b/tsl/src/fdw/estimate.c
@ -103,7 +103,7 @@ get_upper_rel_estimate(PlannerInfo *root, RelOptInfo *rel, CostEstimate *ce)
 	ce->startup_cost = ofpinfo->rel_startup_cost;
 	ce->startup_cost += aggcosts.transCost.startup;
 	ce->startup_cost += aggcosts.transCost.per_tuple * input_rows;
-	ce->startup_cost += (cpu_operator_cost * num_group_cols) * input_rows;
+	ce->startup_cost += cpu_operator_cost * num_group_cols * input_rows;
 	ce->startup_cost += ptarget->cost.startup;

 	/*-----
--- a/tsl/src/fdw/relinfo.c
+++ b/tsl/src/fdw/relinfo.c
@ -12,6 +12,7 @@
 #include <utils/hsearch.h>
 #include <utils/builtins.h>
 #include <utils/lsyscache.h>
+#include <utils/syscache.h>
 #include <miscadmin.h>

 #include <extension_constants.h>
@ -22,6 +23,15 @@
 #include "deparse.h"
 #include "relinfo.h"
 #include "estimate.h"
+#include "chunk_adaptive.h"
+#include "cache.h"
+#include "hypertable.h"
+#include "hypertable_cache.h"
+#include "dimension.h"
+#include "chunk.h"
+#include "hypercube.h"
+#include "errors.h"
+#include "scan_exec.h"

 /* Default CPU cost to start up a foreign query. */
 #define DEFAULT_FDW_STARTUP_COST 100.0
@ -31,6 +41,8 @@

 #define DEFAULT_FDW_FETCH_SIZE 10000

+#define DEFAULT_CHUNK_LOOKBACK_WINDOW 10
+
 /*
 * Parse options from foreign server and apply them to fpinfo.
 *
@ -97,6 +109,257 @@ get_relation_qualified_name(Oid relid)

 	return name->data;
 }
+static const double FILL_FACTOR_CURRENT_CHUNK = 0.5;
+static const double FILL_FACTOR_HISTORICAL_CHUNK = 1;
+
+static DimensionSlice *
+get_chunk_time_slice(Chunk *chunk, Hyperspace *space)
+{
+	int32 time_dim_id = hyperspace_get_open_dimension(space, 0)->fd.id;
+	return ts_hypercube_get_slice_by_dimension_id(chunk->cube, time_dim_id);
+}
+
+/*
+ * Sums of slices belonging to closed dimensions
+ */
+static int
+get_total_number_of_slices(Hyperspace *space)
+{
+	int dim_idx;
+	int total_slices = 0;
+
+	for (dim_idx = 0; dim_idx < space->num_dimensions; dim_idx++)
+	{
+		Dimension *dim = &space->dimensions[dim_idx];
+		if (IS_CLOSED_DIMENSION(dim))
+			total_slices += dim->fd.num_slices;
+	}
+
+	return total_slices;
+}
+
+/*
+ * Fillfactor values are between 0 and 1. It's an indication of how much data is in the chunk.
+ *
+ * Two major drivers for estimation is current time and number of chunks created after.
+ *
+ * Fill factor estimation assumes that data written is 'recent' in regards to time dimension (eg.
+ * almost real-time). For the case when writing historical data, given estimates might be more off
+ * as we assume that historical chunks have fill factor 1 unless the number of chunks created after
+ * is smaller then total number of slices. Even for writing historical data we might not be totally
+ * wrong since most probably data has monotonically increasing time.
+ *
+ * Estimation handles two possible hypertable configurations: 1. time dimension is of timestamp
+ * type 2. time dimension is of integer type. If hypertable uses timestamp type to partition data
+ * then there are three possible scenarios here: we are beyond chunk end time (historical chunk), we
+ * are somewhere in between chunk time boundaries (current chunk) or chunk start time is in the
+ * future (highly unlikely). For integer type we assume that all chunks execpt for current have
+ * factor 1.
+ *
+ * To explain how number of chunks created after the chunk affects estimation
+ * let's imagine that table is space partitioned with one dimension and having 3 partitions. If data
+ * is equaliy distributed amount partitions then there will be 3 current chunks. If there are two
+ * new chunks created after chunk X then chunk X is the current chunk.
+ */
+static double
+estimate_chunk_fillfactor(Chunk *chunk, Hyperspace *space)
+{
+	Dimension *time_dim = hyperspace_get_open_dimension(space, 0);
+	DimensionSlice *time_slice = get_chunk_time_slice(chunk, space);
+	Oid time_dim_type = ts_dimension_get_partition_type(time_dim);
+	int num_created_after = ts_chunk_num_of_chunks_created_after(chunk);
+	int total_slices = get_total_number_of_slices(space);
+
+	if (IS_TIMESTAMP_TYPE(time_dim_type))
+	{
+		TimestampTz now = GetSQLCurrentTimestamp(-1);
+		int64 now_internal_time;
+
+#ifdef TS_DEBUG
+		if (ts_current_timestamp_override_value >= 0)
+			now = ts_current_timestamp_override_value;
+#endif
+		now_internal_time = ts_time_value_to_internal(TimestampTzGetDatum(now), TIMESTAMPTZOID);
+
+		/* if we are beyond end range then chunk can possibly be totally filled */
+		if (time_slice->fd.range_end <= now_internal_time)
+		{
+			/* If there are less newly created chunks then the number of slices then this is current
+			 * chunk. This also works better when writing historical data */
+			return num_created_after < total_slices ? FILL_FACTOR_CURRENT_CHUNK :
+													  FILL_FACTOR_HISTORICAL_CHUNK;
+		}
+
+		/* for chunks in future (highly unlikely) we assume same as for `current` chunk */
+		if (time_slice->fd.range_start >= now_internal_time)
+			return FILL_FACTOR_CURRENT_CHUNK;
+
+		/* current time falls within chunk time constraints */
+		double elapsed = (now_internal_time - time_slice->fd.range_start);
+		double interval = (time_slice->fd.range_end - time_slice->fd.range_start);
+
+		Assert(interval != 0);
+
+		return elapsed / interval;
+	}
+	else
+	{
+		/* if current chunk is the last created we assume it has 0.5 fill factor */
+		return num_created_after < total_slices ? FILL_FACTOR_CURRENT_CHUNK :
+												  FILL_FACTOR_HISTORICAL_CHUNK;
+	}
+}
+
+typedef struct RelEstimates
+{
+	double tuples;
+	BlockNumber pages;
+} RelEstimates;
+
+/*
+ * The idea is to look into number of tuples and pages for N previous chunks
+ * and calculate an average. Ideally we could add weights to this calculation
+ * and give more importance to newer chunks but a ballpark estimate should be
+ * just fine.
+ */
+static RelEstimates *
+estimate_tuples_and_pages_using_prev_chunks(PlannerInfo *root, Hyperspace *space,
+											Chunk *current_chunk)
+{
+	RelEstimates *estimates = palloc0(sizeof(RelEstimates));
+	ListCell *lc;
+	float4 total_tuples = 0;
+	int32 total_pages = 0;
+	int non_zero_reltuples_cnt = 0;
+	int non_zero_relpages_cnt = 0;
+	DimensionSlice *time_slice = get_chunk_time_slice(current_chunk, space);
+	List *prev_chunks = ts_chunk_get_window(time_slice->fd.dimension_id,
+											time_slice->fd.range_start,
+											DEFAULT_CHUNK_LOOKBACK_WINDOW,
+											CurrentMemoryContext);
+
+	foreach (lc, prev_chunks)
+	{
+		Chunk *pc = lfirst(lc);
+		HeapTuple rel_tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(pc->table_id));
+		Form_pg_class rel_form;
+
+		if (!HeapTupleIsValid(rel_tuple))
+			ereport(ERROR,
+					(errcode(ERRCODE_TS_INTERNAL_ERROR),
+					 errmsg("cache lookup failed for chunk relation %u", pc->fd.id),
+					 errdetail("Failed to estimate number of tuples and pages for chunk %d.",
+							   pc->table_id)));
+
+		rel_form = (Form_pg_class) GETSTRUCT(rel_tuple);
+		if (rel_form->reltuples > 0)
+		{
+			total_tuples += rel_form->reltuples;
+			non_zero_reltuples_cnt++;
+		}
+		if (rel_form->relpages > 0)
+		{
+			total_pages += rel_form->relpages;
+			non_zero_relpages_cnt++;
+		}
+		ReleaseSysCache(rel_tuple);
+	}
+
+	if (non_zero_reltuples_cnt > 0)
+		estimates->tuples = total_tuples / non_zero_reltuples_cnt;
+	if (non_zero_relpages_cnt > 0)
+		estimates->pages = total_pages / non_zero_relpages_cnt;
+
+	return estimates;
+}
+
+static RelEstimates *
+estimate_tuples_and_pages_using_shared_buffers(PlannerInfo *root, Hypertable *ht, int result_width)
+{
+	RelEstimates *estimates = palloc(sizeof(RelEstimates));
+	int64 chunk_size_estimate = ts_chunk_calculate_initial_chunk_target_size();
+
+	if (ht != NULL)
+	{
+		int total_slices = get_total_number_of_slices(ht->space);
+		if (total_slices > 0)
+			chunk_size_estimate /= total_slices;
+	}
+	else
+		/* half-size seems to be the safest bet */
+		chunk_size_estimate /= 2;
+
+	estimates->tuples = chunk_size_estimate / (result_width + MAXALIGN(SizeofHeapTupleHeader));
+	estimates->pages = chunk_size_estimate / BLCKSZ;
+	return estimates;
+}
+
+static void
+set_rel_estimates(RelOptInfo *rel, RelEstimates *estimates)
+{
+	rel->tuples = estimates->tuples;
+	rel->pages = estimates->pages;
+}
+
+static void
+rel_estimates_apply_fillfactor(RelEstimates *estimates, double fillfactor)
+{
+	estimates->pages *= fillfactor;
+	estimates->tuples *= fillfactor;
+}
+
+/*
+ * When there are no local stats we try estimating by either using stats from previous chunks (if
+ * they exist) or shared buffers size.
+ */
+static void
+estimate_tuples_and_pages(PlannerInfo *root, RelOptInfo *rel)
+{
+	int parent_relid;
+	RangeTblEntry *hyper_rte;
+	Cache *hcache;
+	Hypertable *ht;
+	double chunk_fillfactor;
+	RangeTblEntry *chunk_rte;
+	Chunk *chunk;
+	Hyperspace *hyperspace;
+	RelEstimates *estimates;
+
+	Assert(rel->tuples == 0);
+	Assert(rel->pages == 0);
+
+	/* In some cases (e.g., UPDATE stmt) top_parent_relids is not set so the best
+		we can do is using shared buffers size without partitioning information.
+	   Since updates are not something we generaly optimize for this should be fine. */
+	if (rel->top_parent_relids == NULL)
+	{
+		estimates =
+			estimate_tuples_and_pages_using_shared_buffers(root, NULL, rel->reltarget->width);
+		set_rel_estimates(rel, estimates);
+		return;
+	}
+
+	parent_relid = bms_next_member(rel->top_parent_relids, -1);
+	hyper_rte = planner_rt_fetch(parent_relid, root);
+	hcache = ts_hypertable_cache_pin();
+	ht = ts_hypertable_cache_get_entry(hcache, hyper_rte->relid, CACHE_FLAG_NONE);
+	hyperspace = ht->space;
+	chunk_rte = planner_rt_fetch(rel->relid, root);
+	chunk = ts_chunk_get_by_relid(chunk_rte->relid, true);
+
+	/* Let's first try figuring out number of tuples/pages using stats from previous chunks,
+	otherwise make an estimation based on shared buffers size */
+	estimates = estimate_tuples_and_pages_using_prev_chunks(root, hyperspace, chunk);
+	if (estimates->tuples == 0 || estimates->pages == 0)
+		estimates = estimate_tuples_and_pages_using_shared_buffers(root, ht, rel->reltarget->width);
+
+	chunk_fillfactor = estimate_chunk_fillfactor(chunk, hyperspace);
+	/* adjust tuples/pages using chunk_fillfactor */
+	rel_estimates_apply_fillfactor(estimates, chunk_fillfactor);
+	set_rel_estimates(rel, estimates);
+
+	ts_cache_release(hcache);
+}

 TsFdwRelInfo *
 fdw_relinfo_create(PlannerInfo *root, RelOptInfo *rel, Oid server_oid, Oid local_table_id,
@ -199,17 +462,10 @@ fdw_relinfo_create(PlannerInfo *root, RelOptInfo *rel, Oid server_oid, Oid local
 	/*
 	 * If the foreign table has never been ANALYZEd, it will have relpages
 	 * and reltuples equal to zero, which most likely has nothing to do
-	 * with reality.  We can't do a whole lot about that if we're not
-	 * allowed to consult the data node, but we can use a hack similar
-	 * to plancat.c's treatment of empty relations: use a minimum size
-	 * estimate of 10 pages, and divide by the column-datatype-based width
-	 * estimate to get the corresponding number of tuples.
+	 * with reality. The best we can do is estimate number of tuples/pages.
 	 */
-	if (rel->pages == 0 && rel->tuples == 0)
-	{
-		rel->pages = 10;
-		rel->tuples = (10 * BLCKSZ) / (rel->reltarget->width + MAXALIGN(SizeofHeapTupleHeader));
-	}
+	if (rel->pages == 0 && rel->tuples == 0 && type == TS_FDW_RELINFO_FOREIGN_TABLE)
+		estimate_tuples_and_pages(root, rel);

 	/* Estimate rel size as best we can with local statistics. There are
 	 * no local statistics for data node rels since they aren't real base
--- a/tsl/src/fdw/scan_exec.c
+++ b/tsl/src/fdw/scan_exec.c
@ -191,7 +191,7 @@ prepare_query_params(PlanState *node, List *fdw_exprs, int num_params, FmgrInfo
 TimestampTz ts_current_timestamp_override_value = -1;

 extern void
-fdw_scan_debug_override_pushdown_timestamp(TimestampTz time)
+fdw_scan_debug_override_current_timestamp(TimestampTz time)
 {
 	ts_current_timestamp_override_value = time;
 }
--- a/tsl/src/fdw/scan_exec.h
+++ b/tsl/src/fdw/scan_exec.h
@ -51,8 +51,10 @@ extern void fdw_scan_explain(ScanState *ss, List *fdw_private, ExplainState *es,
 extern DataFetcher *create_data_fetcher(ScanState *ss, TsFdwScanState *fsstate, FetchMode mode);

 #ifdef TS_DEBUG
+
+extern TimestampTz ts_current_timestamp_override_value;
 /* Allow tests to specify the time to push down in place of now() */
-extern void fdw_scan_debug_override_pushdown_timestamp(TimestampTz time);
+extern void fdw_scan_debug_override_current_timestamp(TimestampTz time);
 #endif

 #endif /* TIMESCALEDB_TSL_FDW_SCAN_EXEC_H */
--- a/tsl/test/expected/debug_notice.out
+++ b/tsl/test/expected/debug_notice.out
@ -61,6 +61,7 @@ INSERT INTO hyper VALUES
       ('2018-05-19 13:01', 4, 4, 5.1),
       ('2018-05-20 15:08', 5, 1, 9.4),
       ('2018-05-30 13:02', 3, 2, 9.0);
+-- Update table stats
 ANALYZE hyper;
 -- Optimizer debug messages shown at debug level 2
 SET client_min_messages TO DEBUG2;
--- a/tsl/test/expected/dist_partial_agg.out
+++ b/tsl/test/expected/dist_partial_agg.out
@ -208,37 +208,39 @@ SET enable_partitionwise_aggregate = ON;
  FROM :TEST_TABLE
  GROUP BY :GROUPING
  ORDER BY :GROUPING;
-                                                                                              QUERY PLAN                                                                                               
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- Custom Scan (AsyncAppend)
+                                                                                 QUERY PLAN                                                                                 
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Sort
   Output: location, (last(highlow, timec)), (first(highlow, timec))
-   ->  Merge Append
-         Sort Key: conditions.location
-         ->  GroupAggregate
-               Output: conditions.location, last(conditions.highlow, conditions.timec), first(conditions.highlow, conditions.timec)
-               Group Key: conditions.location
-               ->  Custom Scan (DataNodeScan) on public.conditions
-                     Output: conditions.location, conditions.highlow, conditions.timec
-                     Data node: data_node_1
-                     Chunks: _hyper_1_1_dist_chunk, _hyper_1_2_dist_chunk, _hyper_1_3_dist_chunk, _hyper_1_4_dist_chunk
-                     Remote SQL: SELECT timec, location, highlow FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) ORDER BY location ASC NULLS LAST
-         ->  GroupAggregate
-               Output: conditions_1.location, last(conditions_1.highlow, conditions_1.timec), first(conditions_1.highlow, conditions_1.timec)
-               Group Key: conditions_1.location
-               ->  Custom Scan (DataNodeScan) on public.conditions conditions_1
-                     Output: conditions_1.location, conditions_1.highlow, conditions_1.timec
-                     Data node: data_node_2
-                     Chunks: _hyper_1_9_dist_chunk, _hyper_1_10_dist_chunk, _hyper_1_11_dist_chunk, _hyper_1_12_dist_chunk
-                     Remote SQL: SELECT timec, location, highlow FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) ORDER BY location ASC NULLS LAST
-         ->  GroupAggregate
-               Output: conditions_2.location, last(conditions_2.highlow, conditions_2.timec), first(conditions_2.highlow, conditions_2.timec)
-               Group Key: conditions_2.location
-               ->  Custom Scan (DataNodeScan) on public.conditions conditions_2
-                     Output: conditions_2.location, conditions_2.highlow, conditions_2.timec
-                     Data node: data_node_3
-                     Chunks: _hyper_1_5_dist_chunk, _hyper_1_6_dist_chunk, _hyper_1_7_dist_chunk, _hyper_1_8_dist_chunk
-                     Remote SQL: SELECT timec, location, highlow FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) ORDER BY location ASC NULLS LAST
-(28 rows)
+   Sort Key: location
+   ->  Custom Scan (AsyncAppend)
+         Output: location, (last(highlow, timec)), (first(highlow, timec))
+         ->  Append
+               ->  HashAggregate
+                     Output: conditions.location, last(conditions.highlow, conditions.timec), first(conditions.highlow, conditions.timec)
+                     Group Key: conditions.location
+                     ->  Custom Scan (DataNodeScan) on public.conditions
+                           Output: conditions.location, conditions.highlow, conditions.timec
+                           Data node: data_node_1
+                           Chunks: _hyper_1_1_dist_chunk, _hyper_1_2_dist_chunk, _hyper_1_3_dist_chunk, _hyper_1_4_dist_chunk
+                           Remote SQL: SELECT timec, location, highlow FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4])
+               ->  HashAggregate
+                     Output: conditions_1.location, last(conditions_1.highlow, conditions_1.timec), first(conditions_1.highlow, conditions_1.timec)
+                     Group Key: conditions_1.location
+                     ->  Custom Scan (DataNodeScan) on public.conditions conditions_1
+                           Output: conditions_1.location, conditions_1.highlow, conditions_1.timec
+                           Data node: data_node_2
+                           Chunks: _hyper_1_9_dist_chunk, _hyper_1_10_dist_chunk, _hyper_1_11_dist_chunk, _hyper_1_12_dist_chunk
+                           Remote SQL: SELECT timec, location, highlow FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4])
+               ->  HashAggregate
+                     Output: conditions_2.location, last(conditions_2.highlow, conditions_2.timec), first(conditions_2.highlow, conditions_2.timec)
+                     Group Key: conditions_2.location
+                     ->  Custom Scan (DataNodeScan) on public.conditions conditions_2
+                           Output: conditions_2.location, conditions_2.highlow, conditions_2.timec
+                           Data node: data_node_3
+                           Chunks: _hyper_1_5_dist_chunk, _hyper_1_6_dist_chunk, _hyper_1_7_dist_chunk, _hyper_1_8_dist_chunk
+                           Remote SQL: SELECT timec, location, highlow FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4])
+(30 rows)

 -- Mix of aggregates that push down and those that don't
 :PREFIX SELECT :GROUPING,
@ -256,37 +258,39 @@ SET enable_partitionwise_aggregate = ON;
  FROM :TEST_TABLE
  GROUP BY :GROUPING
  ORDER BY :GROUPING;
-                                                                                                                                                                                                                                           QUERY PLAN                                                                                                                                                                                                                                            
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- Custom Scan (AsyncAppend)
+                                                                                                                                                                                                                                              QUERY PLAN                                                                                                                                                                                                                                               
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Sort
   Output: location, (min(allnull)), (max(temperature)), ((sum(temperature) + sum(humidity))), (avg(humidity)), (round(stddev((humidity)::integer), 5)), (bit_and(bit_int)), (bit_or(bit_int)), (bool_and(good_life)), (every((temperature > '0'::double precision))), (bool_or(good_life)), (first(highlow, timec))
-   ->  Merge Append
-         Sort Key: conditions.location
-         ->  GroupAggregate
-               Output: conditions.location, min(conditions.allnull), max(conditions.temperature), (sum(conditions.temperature) + sum(conditions.humidity)), avg(conditions.humidity), round(stddev((conditions.humidity)::integer), 5), bit_and(conditions.bit_int), bit_or(conditions.bit_int), bool_and(conditions.good_life), every((conditions.temperature > '0'::double precision)), bool_or(conditions.good_life), first(conditions.highlow, conditions.timec)
-               Group Key: conditions.location
-               ->  Custom Scan (DataNodeScan) on public.conditions
-                     Output: conditions.location, conditions.allnull, conditions.temperature, conditions.humidity, conditions.bit_int, conditions.good_life, conditions.highlow, conditions.timec
-                     Data node: data_node_1
-                     Chunks: _hyper_1_1_dist_chunk, _hyper_1_2_dist_chunk, _hyper_1_3_dist_chunk, _hyper_1_4_dist_chunk
-                     Remote SQL: SELECT timec, location, temperature, humidity, allnull, highlow, bit_int, good_life FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) ORDER BY location ASC NULLS LAST
-         ->  GroupAggregate
-               Output: conditions_1.location, min(conditions_1.allnull), max(conditions_1.temperature), (sum(conditions_1.temperature) + sum(conditions_1.humidity)), avg(conditions_1.humidity), round(stddev((conditions_1.humidity)::integer), 5), bit_and(conditions_1.bit_int), bit_or(conditions_1.bit_int), bool_and(conditions_1.good_life), every((conditions_1.temperature > '0'::double precision)), bool_or(conditions_1.good_life), first(conditions_1.highlow, conditions_1.timec)
-               Group Key: conditions_1.location
-               ->  Custom Scan (DataNodeScan) on public.conditions conditions_1
-                     Output: conditions_1.location, conditions_1.allnull, conditions_1.temperature, conditions_1.humidity, conditions_1.bit_int, conditions_1.good_life, conditions_1.highlow, conditions_1.timec
-                     Data node: data_node_2
-                     Chunks: _hyper_1_9_dist_chunk, _hyper_1_10_dist_chunk, _hyper_1_11_dist_chunk, _hyper_1_12_dist_chunk
-                     Remote SQL: SELECT timec, location, temperature, humidity, allnull, highlow, bit_int, good_life FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) ORDER BY location ASC NULLS LAST
-         ->  GroupAggregate
-               Output: conditions_2.location, min(conditions_2.allnull), max(conditions_2.temperature), (sum(conditions_2.temperature) + sum(conditions_2.humidity)), avg(conditions_2.humidity), round(stddev((conditions_2.humidity)::integer), 5), bit_and(conditions_2.bit_int), bit_or(conditions_2.bit_int), bool_and(conditions_2.good_life), every((conditions_2.temperature > '0'::double precision)), bool_or(conditions_2.good_life), first(conditions_2.highlow, conditions_2.timec)
-               Group Key: conditions_2.location
-               ->  Custom Scan (DataNodeScan) on public.conditions conditions_2
-                     Output: conditions_2.location, conditions_2.allnull, conditions_2.temperature, conditions_2.humidity, conditions_2.bit_int, conditions_2.good_life, conditions_2.highlow, conditions_2.timec
-                     Data node: data_node_3
-                     Chunks: _hyper_1_5_dist_chunk, _hyper_1_6_dist_chunk, _hyper_1_7_dist_chunk, _hyper_1_8_dist_chunk
-                     Remote SQL: SELECT timec, location, temperature, humidity, allnull, highlow, bit_int, good_life FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) ORDER BY location ASC NULLS LAST
-(28 rows)
+   Sort Key: location
+   ->  Custom Scan (AsyncAppend)
+         Output: location, (min(allnull)), (max(temperature)), ((sum(temperature) + sum(humidity))), (avg(humidity)), (round(stddev((humidity)::integer), 5)), (bit_and(bit_int)), (bit_or(bit_int)), (bool_and(good_life)), (every((temperature > '0'::double precision))), (bool_or(good_life)), (first(highlow, timec))
+         ->  Append
+               ->  HashAggregate
+                     Output: conditions.location, min(conditions.allnull), max(conditions.temperature), (sum(conditions.temperature) + sum(conditions.humidity)), avg(conditions.humidity), round(stddev((conditions.humidity)::integer), 5), bit_and(conditions.bit_int), bit_or(conditions.bit_int), bool_and(conditions.good_life), every((conditions.temperature > '0'::double precision)), bool_or(conditions.good_life), first(conditions.highlow, conditions.timec)
+                     Group Key: conditions.location
+                     ->  Custom Scan (DataNodeScan) on public.conditions
+                           Output: conditions.location, conditions.allnull, conditions.temperature, conditions.humidity, conditions.bit_int, conditions.good_life, conditions.highlow, conditions.timec
+                           Data node: data_node_1
+                           Chunks: _hyper_1_1_dist_chunk, _hyper_1_2_dist_chunk, _hyper_1_3_dist_chunk, _hyper_1_4_dist_chunk
+                           Remote SQL: SELECT timec, location, temperature, humidity, allnull, highlow, bit_int, good_life FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4])
+               ->  HashAggregate
+                     Output: conditions_1.location, min(conditions_1.allnull), max(conditions_1.temperature), (sum(conditions_1.temperature) + sum(conditions_1.humidity)), avg(conditions_1.humidity), round(stddev((conditions_1.humidity)::integer), 5), bit_and(conditions_1.bit_int), bit_or(conditions_1.bit_int), bool_and(conditions_1.good_life), every((conditions_1.temperature > '0'::double precision)), bool_or(conditions_1.good_life), first(conditions_1.highlow, conditions_1.timec)
+                     Group Key: conditions_1.location
+                     ->  Custom Scan (DataNodeScan) on public.conditions conditions_1
+                           Output: conditions_1.location, conditions_1.allnull, conditions_1.temperature, conditions_1.humidity, conditions_1.bit_int, conditions_1.good_life, conditions_1.highlow, conditions_1.timec
+                           Data node: data_node_2
+                           Chunks: _hyper_1_9_dist_chunk, _hyper_1_10_dist_chunk, _hyper_1_11_dist_chunk, _hyper_1_12_dist_chunk
+                           Remote SQL: SELECT timec, location, temperature, humidity, allnull, highlow, bit_int, good_life FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4])
+               ->  HashAggregate
+                     Output: conditions_2.location, min(conditions_2.allnull), max(conditions_2.temperature), (sum(conditions_2.temperature) + sum(conditions_2.humidity)), avg(conditions_2.humidity), round(stddev((conditions_2.humidity)::integer), 5), bit_and(conditions_2.bit_int), bit_or(conditions_2.bit_int), bool_and(conditions_2.good_life), every((conditions_2.temperature > '0'::double precision)), bool_or(conditions_2.good_life), first(conditions_2.highlow, conditions_2.timec)
+                     Group Key: conditions_2.location
+                     ->  Custom Scan (DataNodeScan) on public.conditions conditions_2
+                           Output: conditions_2.location, conditions_2.allnull, conditions_2.temperature, conditions_2.humidity, conditions_2.bit_int, conditions_2.good_life, conditions_2.highlow, conditions_2.timec
+                           Data node: data_node_3
+                           Chunks: _hyper_1_5_dist_chunk, _hyper_1_6_dist_chunk, _hyper_1_7_dist_chunk, _hyper_1_8_dist_chunk
+                           Remote SQL: SELECT timec, location, temperature, humidity, allnull, highlow, bit_int, good_life FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4])
+(30 rows)

 \set GROUPING 'region'
 \ir 'include/aggregate_queries.sql'
@ -370,40 +374,42 @@ SET enable_partitionwise_aggregate = ON;
  FROM :TEST_TABLE
  GROUP BY :GROUPING
  ORDER BY :GROUPING;
-                                                                                               QUERY PLAN                                                                                                
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+                                                                                   QUERY PLAN                                                                                   
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 Finalize GroupAggregate
   Output: region, last(highlow, timec), first(highlow, timec)
   Group Key: region
-   ->  Custom Scan (AsyncAppend)
+   ->  Sort
         Output: region, (PARTIAL last(highlow, timec)), (PARTIAL first(highlow, timec))
-         ->  Merge Append
-               Sort Key: conditions.region
-               ->  Partial GroupAggregate
-                     Output: conditions.region, PARTIAL last(conditions.highlow, conditions.timec), PARTIAL first(conditions.highlow, conditions.timec)
-                     Group Key: conditions.region
-                     ->  Custom Scan (DataNodeScan) on public.conditions
-                           Output: conditions.region, conditions.highlow, conditions.timec
-                           Data node: data_node_1
-                           Chunks: _hyper_1_1_dist_chunk, _hyper_1_2_dist_chunk, _hyper_1_3_dist_chunk, _hyper_1_4_dist_chunk
-                           Remote SQL: SELECT timec, region, highlow FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) ORDER BY region ASC NULLS LAST
-               ->  Partial GroupAggregate
-                     Output: conditions_1.region, PARTIAL last(conditions_1.highlow, conditions_1.timec), PARTIAL first(conditions_1.highlow, conditions_1.timec)
-                     Group Key: conditions_1.region
-                     ->  Custom Scan (DataNodeScan) on public.conditions conditions_1
-                           Output: conditions_1.region, conditions_1.highlow, conditions_1.timec
-                           Data node: data_node_2
-                           Chunks: _hyper_1_9_dist_chunk, _hyper_1_10_dist_chunk, _hyper_1_11_dist_chunk, _hyper_1_12_dist_chunk
-                           Remote SQL: SELECT timec, region, highlow FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) ORDER BY region ASC NULLS LAST
-               ->  Partial GroupAggregate
-                     Output: conditions_2.region, PARTIAL last(conditions_2.highlow, conditions_2.timec), PARTIAL first(conditions_2.highlow, conditions_2.timec)
-                     Group Key: conditions_2.region
-                     ->  Custom Scan (DataNodeScan) on public.conditions conditions_2
-                           Output: conditions_2.region, conditions_2.highlow, conditions_2.timec
-                           Data node: data_node_3
-                           Chunks: _hyper_1_5_dist_chunk, _hyper_1_6_dist_chunk, _hyper_1_7_dist_chunk, _hyper_1_8_dist_chunk
-                           Remote SQL: SELECT timec, region, highlow FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) ORDER BY region ASC NULLS LAST
-(31 rows)
+         Sort Key: region
+         ->  Custom Scan (AsyncAppend)
+               Output: region, (PARTIAL last(highlow, timec)), (PARTIAL first(highlow, timec))
+               ->  Append
+                     ->  Partial HashAggregate
+                           Output: conditions.region, PARTIAL last(conditions.highlow, conditions.timec), PARTIAL first(conditions.highlow, conditions.timec)
+                           Group Key: conditions.region
+                           ->  Custom Scan (DataNodeScan) on public.conditions
+                                 Output: conditions.region, conditions.highlow, conditions.timec
+                                 Data node: data_node_1
+                                 Chunks: _hyper_1_1_dist_chunk, _hyper_1_2_dist_chunk, _hyper_1_3_dist_chunk, _hyper_1_4_dist_chunk
+                                 Remote SQL: SELECT timec, region, highlow FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4])
+                     ->  Partial HashAggregate
+                           Output: conditions_1.region, PARTIAL last(conditions_1.highlow, conditions_1.timec), PARTIAL first(conditions_1.highlow, conditions_1.timec)
+                           Group Key: conditions_1.region
+                           ->  Custom Scan (DataNodeScan) on public.conditions conditions_1
+                                 Output: conditions_1.region, conditions_1.highlow, conditions_1.timec
+                                 Data node: data_node_2
+                                 Chunks: _hyper_1_9_dist_chunk, _hyper_1_10_dist_chunk, _hyper_1_11_dist_chunk, _hyper_1_12_dist_chunk
+                                 Remote SQL: SELECT timec, region, highlow FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4])
+                     ->  Partial HashAggregate
+                           Output: conditions_2.region, PARTIAL last(conditions_2.highlow, conditions_2.timec), PARTIAL first(conditions_2.highlow, conditions_2.timec)
+                           Group Key: conditions_2.region
+                           ->  Custom Scan (DataNodeScan) on public.conditions conditions_2
+                                 Output: conditions_2.region, conditions_2.highlow, conditions_2.timec
+                                 Data node: data_node_3
+                                 Chunks: _hyper_1_5_dist_chunk, _hyper_1_6_dist_chunk, _hyper_1_7_dist_chunk, _hyper_1_8_dist_chunk
+                                 Remote SQL: SELECT timec, region, highlow FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4])
+(33 rows)

 -- Mix of aggregates that push down and those that don't
 :PREFIX SELECT :GROUPING,
@ -421,40 +427,42 @@ SET enable_partitionwise_aggregate = ON;
  FROM :TEST_TABLE
  GROUP BY :GROUPING
  ORDER BY :GROUPING;
-                                                                                                                                                                                                                                                                                       QUERY PLAN                                                                                                                                                                                                                                                                                       
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+                                                                                                                                                                                                                                                                                          QUERY PLAN                                                                                                                                                                                                                                                                                          
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 Finalize GroupAggregate
   Output: region, min(allnull), max(temperature), (sum(temperature) + sum(humidity)), avg(humidity), round(stddev((humidity)::integer), 5), bit_and(bit_int), bit_or(bit_int), bool_and(good_life), every((temperature > '0'::double precision)), bool_or(good_life), first(highlow, timec)
   Group Key: region
-   ->  Custom Scan (AsyncAppend)
+   ->  Sort
         Output: region, (PARTIAL min(allnull)), (PARTIAL max(temperature)), (PARTIAL sum(temperature)), (PARTIAL sum(humidity)), (PARTIAL avg(humidity)), (PARTIAL stddev((humidity)::integer)), (PARTIAL bit_and(bit_int)), (PARTIAL bit_or(bit_int)), (PARTIAL bool_and(good_life)), (PARTIAL every((temperature > '0'::double precision))), (PARTIAL bool_or(good_life)), (PARTIAL first(highlow, timec))
-         ->  Merge Append
-               Sort Key: conditions.region
-               ->  Partial GroupAggregate
-                     Output: conditions.region, PARTIAL min(conditions.allnull), PARTIAL max(conditions.temperature), PARTIAL sum(conditions.temperature), PARTIAL sum(conditions.humidity), PARTIAL avg(conditions.humidity), PARTIAL stddev((conditions.humidity)::integer), PARTIAL bit_and(conditions.bit_int), PARTIAL bit_or(conditions.bit_int), PARTIAL bool_and(conditions.good_life), PARTIAL every((conditions.temperature > '0'::double precision)), PARTIAL bool_or(conditions.good_life), PARTIAL first(conditions.highlow, conditions.timec)
-                     Group Key: conditions.region
-                     ->  Custom Scan (DataNodeScan) on public.conditions
-                           Output: conditions.region, conditions.allnull, conditions.temperature, conditions.humidity, conditions.bit_int, conditions.good_life, conditions.highlow, conditions.timec
-                           Data node: data_node_1
-                           Chunks: _hyper_1_1_dist_chunk, _hyper_1_2_dist_chunk, _hyper_1_3_dist_chunk, _hyper_1_4_dist_chunk
-                           Remote SQL: SELECT timec, region, temperature, humidity, allnull, highlow, bit_int, good_life FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) ORDER BY region ASC NULLS LAST
-               ->  Partial GroupAggregate
-                     Output: conditions_1.region, PARTIAL min(conditions_1.allnull), PARTIAL max(conditions_1.temperature), PARTIAL sum(conditions_1.temperature), PARTIAL sum(conditions_1.humidity), PARTIAL avg(conditions_1.humidity), PARTIAL stddev((conditions_1.humidity)::integer), PARTIAL bit_and(conditions_1.bit_int), PARTIAL bit_or(conditions_1.bit_int), PARTIAL bool_and(conditions_1.good_life), PARTIAL every((conditions_1.temperature > '0'::double precision)), PARTIAL bool_or(conditions_1.good_life), PARTIAL first(conditions_1.highlow, conditions_1.timec)
-                     Group Key: conditions_1.region
-                     ->  Custom Scan (DataNodeScan) on public.conditions conditions_1
-                           Output: conditions_1.region, conditions_1.allnull, conditions_1.temperature, conditions_1.humidity, conditions_1.bit_int, conditions_1.good_life, conditions_1.highlow, conditions_1.timec
-                           Data node: data_node_2
-                           Chunks: _hyper_1_9_dist_chunk, _hyper_1_10_dist_chunk, _hyper_1_11_dist_chunk, _hyper_1_12_dist_chunk
-                           Remote SQL: SELECT timec, region, temperature, humidity, allnull, highlow, bit_int, good_life FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) ORDER BY region ASC NULLS LAST
-               ->  Partial GroupAggregate
-                     Output: conditions_2.region, PARTIAL min(conditions_2.allnull), PARTIAL max(conditions_2.temperature), PARTIAL sum(conditions_2.temperature), PARTIAL sum(conditions_2.humidity), PARTIAL avg(conditions_2.humidity), PARTIAL stddev((conditions_2.humidity)::integer), PARTIAL bit_and(conditions_2.bit_int), PARTIAL bit_or(conditions_2.bit_int), PARTIAL bool_and(conditions_2.good_life), PARTIAL every((conditions_2.temperature > '0'::double precision)), PARTIAL bool_or(conditions_2.good_life), PARTIAL first(conditions_2.highlow, conditions_2.timec)
-                     Group Key: conditions_2.region
-                     ->  Custom Scan (DataNodeScan) on public.conditions conditions_2
-                           Output: conditions_2.region, conditions_2.allnull, conditions_2.temperature, conditions_2.humidity, conditions_2.bit_int, conditions_2.good_life, conditions_2.highlow, conditions_2.timec
-                           Data node: data_node_3
-                           Chunks: _hyper_1_5_dist_chunk, _hyper_1_6_dist_chunk, _hyper_1_7_dist_chunk, _hyper_1_8_dist_chunk
-                           Remote SQL: SELECT timec, region, temperature, humidity, allnull, highlow, bit_int, good_life FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) ORDER BY region ASC NULLS LAST
-(31 rows)
+         Sort Key: region
+         ->  Custom Scan (AsyncAppend)
+               Output: region, (PARTIAL min(allnull)), (PARTIAL max(temperature)), (PARTIAL sum(temperature)), (PARTIAL sum(humidity)), (PARTIAL avg(humidity)), (PARTIAL stddev((humidity)::integer)), (PARTIAL bit_and(bit_int)), (PARTIAL bit_or(bit_int)), (PARTIAL bool_and(good_life)), (PARTIAL every((temperature > '0'::double precision))), (PARTIAL bool_or(good_life)), (PARTIAL first(highlow, timec))
+               ->  Append
+                     ->  Partial HashAggregate
+                           Output: conditions.region, PARTIAL min(conditions.allnull), PARTIAL max(conditions.temperature), PARTIAL sum(conditions.temperature), PARTIAL sum(conditions.humidity), PARTIAL avg(conditions.humidity), PARTIAL stddev((conditions.humidity)::integer), PARTIAL bit_and(conditions.bit_int), PARTIAL bit_or(conditions.bit_int), PARTIAL bool_and(conditions.good_life), PARTIAL every((conditions.temperature > '0'::double precision)), PARTIAL bool_or(conditions.good_life), PARTIAL first(conditions.highlow, conditions.timec)
+                           Group Key: conditions.region
+                           ->  Custom Scan (DataNodeScan) on public.conditions
+                                 Output: conditions.region, conditions.allnull, conditions.temperature, conditions.humidity, conditions.bit_int, conditions.good_life, conditions.highlow, conditions.timec
+                                 Data node: data_node_1
+                                 Chunks: _hyper_1_1_dist_chunk, _hyper_1_2_dist_chunk, _hyper_1_3_dist_chunk, _hyper_1_4_dist_chunk
+                                 Remote SQL: SELECT timec, region, temperature, humidity, allnull, highlow, bit_int, good_life FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4])
+                     ->  Partial HashAggregate
+                           Output: conditions_1.region, PARTIAL min(conditions_1.allnull), PARTIAL max(conditions_1.temperature), PARTIAL sum(conditions_1.temperature), PARTIAL sum(conditions_1.humidity), PARTIAL avg(conditions_1.humidity), PARTIAL stddev((conditions_1.humidity)::integer), PARTIAL bit_and(conditions_1.bit_int), PARTIAL bit_or(conditions_1.bit_int), PARTIAL bool_and(conditions_1.good_life), PARTIAL every((conditions_1.temperature > '0'::double precision)), PARTIAL bool_or(conditions_1.good_life), PARTIAL first(conditions_1.highlow, conditions_1.timec)
+                           Group Key: conditions_1.region
+                           ->  Custom Scan (DataNodeScan) on public.conditions conditions_1
+                                 Output: conditions_1.region, conditions_1.allnull, conditions_1.temperature, conditions_1.humidity, conditions_1.bit_int, conditions_1.good_life, conditions_1.highlow, conditions_1.timec
+                                 Data node: data_node_2
+                                 Chunks: _hyper_1_9_dist_chunk, _hyper_1_10_dist_chunk, _hyper_1_11_dist_chunk, _hyper_1_12_dist_chunk
+                                 Remote SQL: SELECT timec, region, temperature, humidity, allnull, highlow, bit_int, good_life FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4])
+                     ->  Partial HashAggregate
+                           Output: conditions_2.region, PARTIAL min(conditions_2.allnull), PARTIAL max(conditions_2.temperature), PARTIAL sum(conditions_2.temperature), PARTIAL sum(conditions_2.humidity), PARTIAL avg(conditions_2.humidity), PARTIAL stddev((conditions_2.humidity)::integer), PARTIAL bit_and(conditions_2.bit_int), PARTIAL bit_or(conditions_2.bit_int), PARTIAL bool_and(conditions_2.good_life), PARTIAL every((conditions_2.temperature > '0'::double precision)), PARTIAL bool_or(conditions_2.good_life), PARTIAL first(conditions_2.highlow, conditions_2.timec)
+                           Group Key: conditions_2.region
+                           ->  Custom Scan (DataNodeScan) on public.conditions conditions_2
+                                 Output: conditions_2.region, conditions_2.allnull, conditions_2.temperature, conditions_2.humidity, conditions_2.bit_int, conditions_2.good_life, conditions_2.highlow, conditions_2.timec
+                                 Data node: data_node_3
+                                 Chunks: _hyper_1_5_dist_chunk, _hyper_1_6_dist_chunk, _hyper_1_7_dist_chunk, _hyper_1_8_dist_chunk
+                                 Remote SQL: SELECT timec, region, temperature, humidity, allnull, highlow, bit_int, good_life FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4])
+(33 rows)

 -- Full aggregate pushdown correctness check, compare location grouped query results with partionwise aggregates on and off
 \set GROUPING 'location'
@ -480,11 +488,3 @@ SELECT format('\! diff %s %s', :'RESULTS_CONTROL2', :'RESULTS_TEST2') as "DIFF_C
 -- multiple values for "col" that has the same timestamp, so the
 -- output depends on the order of arriving tuples.
 :DIFF_CMD2
-14c14
-<  west       | (1,2)   | (1,2)
---
->  west       | (1,2)   | 
-22c22
-<  west       |             |       85 |       993674 | 67.5 | 9.68309 |       0 |     10 | t        | t     | t       | (1,2)
---
->  west       |             |       85 |       993674 | 67.5 | 9.68309 |       0 |     10 | t        | t     | t       | 
--- a/tsl/test/expected/hypertable_distributed-11.out
+++ b/tsl/test/expected/hypertable_distributed-11.out
@ -3,20 +3,10 @@
 -- LICENSE-TIMESCALE for a copy of the license.
 -- Need to be super user to create extension and add data nodes
 \c :TEST_DBNAME :ROLE_CLUSTER_SUPERUSER;
-\ir include/remote_exec.sql
-- This file and its contents are licensed under the Timescale License.
-- Please see the included NOTICE for copyright information and
-- LICENSE-TIMESCALE for a copy of the license.
-CREATE SCHEMA IF NOT EXISTS test;
-psql:include/remote_exec.sql:5: NOTICE:  schema "test" already exists, skipping
-GRANT USAGE ON SCHEMA test TO PUBLIC;
-CREATE OR REPLACE FUNCTION test.remote_exec(srv_name name[], command text)
-RETURNS VOID
-AS :TSL_MODULE_PATHNAME, 'ts_remote_exec'
-LANGUAGE C;
-- Support for execute_sql_and_filter_data_node_name_on_error()
 \unset ECHO
 psql:include/filter_exec.sql:5: NOTICE:  schema "test" already exists, skipping
+psql:include/remote_exec.sql:5: NOTICE:  schema "test" already exists, skipping
+psql:utils/testsupport.sql:8: NOTICE:  schema "test" already exists, skipping
 -- Cleanup from other potential tests that created these databases
 SET client_min_messages TO ERROR;
 DROP DATABASE IF EXISTS data_node_1;
@ -2358,27 +2348,27 @@ UPDATE disttable_replicated SET device = 2 WHERE device = (SELECT device FROM de
                       ->  Foreign Scan on _timescaledb_internal._hyper_6_11_dist_chunk _hyper_6_11_dist_chunk_1 (actual rows=2 loops=1)
                             Output: _hyper_6_11_dist_chunk_1.device
                             Data node: data_node_1
-                             Remote SQL: SELECT device FROM _timescaledb_internal._hyper_6_11_dist_chunk ORDER BY device ASC NULLS LAST
+                             Remote SQL: SELECT device FROM _timescaledb_internal._hyper_6_11_dist_chunk
                       ->  Foreign Scan on _timescaledb_internal._hyper_6_12_dist_chunk _hyper_6_12_dist_chunk_1 (actual rows=1 loops=1)
                             Output: _hyper_6_12_dist_chunk_1.device
                             Data node: data_node_2
-                             Remote SQL: SELECT device FROM _timescaledb_internal._hyper_6_12_dist_chunk ORDER BY device ASC NULLS LAST
+                             Remote SQL: SELECT device FROM _timescaledb_internal._hyper_6_12_dist_chunk
                       ->  Foreign Scan on _timescaledb_internal._hyper_6_13_dist_chunk _hyper_6_13_dist_chunk_1 (actual rows=1 loops=1)
                             Output: _hyper_6_13_dist_chunk_1.device
                             Data node: data_node_3
-                             Remote SQL: SELECT device FROM _timescaledb_internal._hyper_6_13_dist_chunk ORDER BY device ASC NULLS LAST
+                             Remote SQL: SELECT device FROM _timescaledb_internal._hyper_6_13_dist_chunk
                       ->  Foreign Scan on _timescaledb_internal._hyper_6_14_dist_chunk _hyper_6_14_dist_chunk_1 (actual rows=2 loops=1)
                             Output: _hyper_6_14_dist_chunk_1.device
                             Data node: data_node_1
-                             Remote SQL: SELECT device FROM _timescaledb_internal._hyper_6_14_dist_chunk ORDER BY device ASC NULLS LAST
+                             Remote SQL: SELECT device FROM _timescaledb_internal._hyper_6_14_dist_chunk
                       ->  Foreign Scan on _timescaledb_internal._hyper_6_15_dist_chunk _hyper_6_15_dist_chunk_1 (actual rows=1 loops=1)
                             Output: _hyper_6_15_dist_chunk_1.device
                             Data node: data_node_2
-                             Remote SQL: SELECT device FROM _timescaledb_internal._hyper_6_15_dist_chunk ORDER BY device ASC NULLS LAST
+                             Remote SQL: SELECT device FROM _timescaledb_internal._hyper_6_15_dist_chunk
                       ->  Foreign Scan on _timescaledb_internal._hyper_6_16_dist_chunk _hyper_6_16_dist_chunk_1 (actual rows=1 loops=1)
                             Output: _hyper_6_16_dist_chunk_1.device
                             Data node: data_node_3
-                             Remote SQL: SELECT device FROM _timescaledb_internal._hyper_6_16_dist_chunk ORDER BY device ASC NULLS LAST
+                             Remote SQL: SELECT device FROM _timescaledb_internal._hyper_6_16_dist_chunk
   InitPlan 2 (returns $1)
     ->  Limit (actual rows=1 loops=1)
           Output: devices.device
@ -3295,12 +3285,12 @@ INSERT INTO dist_device VALUES
       ('2017-01-01 06:01', 1, 1.1),
       ('2017-01-01 09:11', 3, 2.1),
       ('2017-01-01 08:01', 1, 1.2);
-EXPLAIN VERBOSE
+EXPLAIN (VERBOSE, COSTS OFF)
 SELECT * FROM dist_device;
                                                                     QUERY PLAN                                                                      
 -----------------------------------------------------------------------------------------------------------------------------------------------------
- Append  (cost=100.00..175.14 rows=1861 width=20)
-   ->  Custom Scan (DataNodeScan) on public.dist_device  (cost=100.00..165.83 rows=1861 width=20)
+ Append
+   ->  Custom Scan (DataNodeScan) on public.dist_device
         Output: dist_device."time", dist_device.dist_device, dist_device.temp
         Data node: data_node_1
         Chunks: _hyper_15_36_dist_chunk
@ -3315,3 +3305,89 @@ SELECT * FROM dist_device;
 Sun Jan 01 08:01:00 2017 PST |           1 |  1.2
 (3 rows)

+-- Test estimating relation size without stats
+CREATE TABLE hyper_estimate(time timestamptz, device int, temp float);
+SELECT * FROM create_distributed_hypertable('hyper_estimate', 'time', 'device', number_partitions => 3, replication_factor => 1, chunk_time_interval => INTERVAL '7 days');
+NOTICE:  adding not-null constraint to column "time"
+ hypertable_id | schema_name |   table_name   | created 
+---------------+-------------+----------------+---------
+            16 | public      | hyper_estimate | t
+(1 row)
+
+-- This will enable us to more easily see estimates per chunk
+SET timescaledb.enable_per_data_node_queries = false;
+-- Estimating chunk progress uses current timestamp so we override it for test purposes 
+SELECT ts_test_override_current_timestamptz('2019-11-11 00:00'::timestamptz);
+ ts_test_override_current_timestamptz 
+--------------------------------------
+ 
+(1 row)
+
+-- Test estimates when backfilling. 3 chunks should be historical and 3 should be considered current when estimating.
+-- Note that estimate numbers are way off since we are using shared buffer size as starting point. This will not be
+-- an issue in 'production' like env since chunk size should be similar to shared buffer size.
+INSERT INTO hyper_estimate VALUES
+       ('2017-01-01 06:01', 1, 1.1),
+       ('2017-01-01 09:11', 1, 2.1),
+       ('2017-01-01 08:01', 1, 1.2),
+       ('2017-01-02 08:01', 1, 1.3),
+       ('2017-01-02 08:01', 2, 1.6),
+       ('2017-01-02 06:01', 2, 1.4),
+       ('2017-01-03 01:01', 3, 2),
+       ('2017-01-03 01:16', 3, 3),
+       ('2017-01-03 01:17', 3, 4),
+       ('2018-01-13 01:01', 1, 2),
+       ('2018-01-13 01:10', 1, 0.4),
+       ('2018-01-13 02:10', 2, 1.4),
+       ('2018-01-13 05:01', 2, 2),
+       ('2018-01-13 05:50', 2, 4),
+       ('2018-01-13 16:01', 3, 2);
+-- Since there are no stats we use shared buffers size to estimate number of rows
+EXPLAIN (COSTS ON)
+SELECT *
+FROM hyper_estimate;
+                                         QUERY PLAN                                          
+---------------------------------------------------------------------------------------------
+ Append  (cost=100.00..166847.40 rows=4118040 width=20)
+   ->  Foreign Scan on _hyper_16_37_dist_chunk  (cost=100.00..32468.60 rows=915120 width=20)
+   ->  Foreign Scan on _hyper_16_38_dist_chunk  (cost=100.00..32468.60 rows=915120 width=20)
+   ->  Foreign Scan on _hyper_16_39_dist_chunk  (cost=100.00..32468.60 rows=915120 width=20)
+   ->  Foreign Scan on _hyper_16_40_dist_chunk  (cost=100.00..16283.80 rows=457560 width=20)
+   ->  Foreign Scan on _hyper_16_41_dist_chunk  (cost=100.00..16283.80 rows=457560 width=20)
+   ->  Foreign Scan on _hyper_16_42_dist_chunk  (cost=100.00..16283.80 rows=457560 width=20)
+(7 rows)
+
+-- This will calculate the stats
+ANALYZE hyper_estimate;
+EXPLAIN (COSTS ON)
+SELECT *
+FROM hyper_estimate;
+                                      QUERY PLAN                                      
+--------------------------------------------------------------------------------------
+ Append  (cost=100.00..606.52 rows=15 width=20)
+   ->  Foreign Scan on _hyper_16_37_dist_chunk  (cost=100.00..101.12 rows=4 width=20)
+   ->  Foreign Scan on _hyper_16_38_dist_chunk  (cost=100.00..101.06 rows=2 width=20)
+   ->  Foreign Scan on _hyper_16_39_dist_chunk  (cost=100.00..101.09 rows=3 width=20)
+   ->  Foreign Scan on _hyper_16_40_dist_chunk  (cost=100.00..101.06 rows=2 width=20)
+   ->  Foreign Scan on _hyper_16_41_dist_chunk  (cost=100.00..101.09 rows=3 width=20)
+   ->  Foreign Scan on _hyper_16_42_dist_chunk  (cost=100.00..101.03 rows=1 width=20)
+(7 rows)
+
+-- Let's insert data into a new chunk. This will result in chunk creation.
+INSERT INTO hyper_estimate VALUES ('2019-11-11 06:01', 1, 1.1);
+-- We have stats for previous chunks so we can interpolate number of rows for the new chunk
+EXPLAIN (COSTS ON)
+SELECT *
+FROM hyper_estimate;
+                                      QUERY PLAN                                      
+--------------------------------------------------------------------------------------
+ Append  (cost=100.00..706.58 rows=17 width=20)
+   ->  Foreign Scan on _hyper_16_37_dist_chunk  (cost=100.00..101.12 rows=4 width=20)
+   ->  Foreign Scan on _hyper_16_38_dist_chunk  (cost=100.00..101.06 rows=2 width=20)
+   ->  Foreign Scan on _hyper_16_39_dist_chunk  (cost=100.00..101.09 rows=3 width=20)
+   ->  Foreign Scan on _hyper_16_40_dist_chunk  (cost=100.00..101.06 rows=2 width=20)
+   ->  Foreign Scan on _hyper_16_41_dist_chunk  (cost=100.00..101.09 rows=3 width=20)
+   ->  Foreign Scan on _hyper_16_42_dist_chunk  (cost=100.00..101.03 rows=1 width=20)
+   ->  Foreign Scan on _hyper_16_43_dist_chunk  (cost=100.00..100.05 rows=2 width=20)
+(8 rows)
+
--- a/tsl/test/expected/hypertable_distributed-12.out
+++ b/tsl/test/expected/hypertable_distributed-12.out
@ -3,20 +3,10 @@
 -- LICENSE-TIMESCALE for a copy of the license.
 -- Need to be super user to create extension and add data nodes
 \c :TEST_DBNAME :ROLE_CLUSTER_SUPERUSER;
-\ir include/remote_exec.sql
-- This file and its contents are licensed under the Timescale License.
-- Please see the included NOTICE for copyright information and
-- LICENSE-TIMESCALE for a copy of the license.
-CREATE SCHEMA IF NOT EXISTS test;
-psql:include/remote_exec.sql:5: NOTICE:  schema "test" already exists, skipping
-GRANT USAGE ON SCHEMA test TO PUBLIC;
-CREATE OR REPLACE FUNCTION test.remote_exec(srv_name name[], command text)
-RETURNS VOID
-AS :TSL_MODULE_PATHNAME, 'ts_remote_exec'
-LANGUAGE C;
-- Support for execute_sql_and_filter_data_node_name_on_error()
 \unset ECHO
 psql:include/filter_exec.sql:5: NOTICE:  schema "test" already exists, skipping
+psql:include/remote_exec.sql:5: NOTICE:  schema "test" already exists, skipping
+psql:utils/testsupport.sql:8: NOTICE:  schema "test" already exists, skipping
 -- Cleanup from other potential tests that created these databases
 SET client_min_messages TO ERROR;
 DROP DATABASE IF EXISTS data_node_1;
@ -3289,11 +3279,11 @@ INSERT INTO dist_device VALUES
       ('2017-01-01 06:01', 1, 1.1),
       ('2017-01-01 09:11', 3, 2.1),
       ('2017-01-01 08:01', 1, 1.2);
-EXPLAIN VERBOSE
+EXPLAIN (VERBOSE, COSTS OFF)
 SELECT * FROM dist_device;
                                                                  QUERY PLAN                                                                   
 -----------------------------------------------------------------------------------------------------------------------------------------------
- Custom Scan (DataNodeScan) on public.dist_device  (cost=100.00..165.83 rows=1861 width=20)
+ Custom Scan (DataNodeScan) on public.dist_device
   Output: dist_device."time", dist_device.dist_device, dist_device.temp
   Data node: data_node_1
   Chunks: _hyper_15_36_dist_chunk
@ -3308,3 +3298,89 @@ SELECT * FROM dist_device;
 Sun Jan 01 08:01:00 2017 PST |           1 |  1.2
 (3 rows)

+-- Test estimating relation size without stats
+CREATE TABLE hyper_estimate(time timestamptz, device int, temp float);
+SELECT * FROM create_distributed_hypertable('hyper_estimate', 'time', 'device', number_partitions => 3, replication_factor => 1, chunk_time_interval => INTERVAL '7 days');
+NOTICE:  adding not-null constraint to column "time"
+ hypertable_id | schema_name |   table_name   | created 
+---------------+-------------+----------------+---------
+            16 | public      | hyper_estimate | t
+(1 row)
+
+-- This will enable us to more easily see estimates per chunk
+SET timescaledb.enable_per_data_node_queries = false;
+-- Estimating chunk progress uses current timestamp so we override it for test purposes 
+SELECT ts_test_override_current_timestamptz('2019-11-11 00:00'::timestamptz);
+ ts_test_override_current_timestamptz 
+--------------------------------------
+ 
+(1 row)
+
+-- Test estimates when backfilling. 3 chunks should be historical and 3 should be considered current when estimating.
+-- Note that estimate numbers are way off since we are using shared buffer size as starting point. This will not be
+-- an issue in 'production' like env since chunk size should be similar to shared buffer size.
+INSERT INTO hyper_estimate VALUES
+       ('2017-01-01 06:01', 1, 1.1),
+       ('2017-01-01 09:11', 1, 2.1),
+       ('2017-01-01 08:01', 1, 1.2),
+       ('2017-01-02 08:01', 1, 1.3),
+       ('2017-01-02 08:01', 2, 1.6),
+       ('2017-01-02 06:01', 2, 1.4),
+       ('2017-01-03 01:01', 3, 2),
+       ('2017-01-03 01:16', 3, 3),
+       ('2017-01-03 01:17', 3, 4),
+       ('2018-01-13 01:01', 1, 2),
+       ('2018-01-13 01:10', 1, 0.4),
+       ('2018-01-13 02:10', 2, 1.4),
+       ('2018-01-13 05:01', 2, 2),
+       ('2018-01-13 05:50', 2, 4),
+       ('2018-01-13 16:01', 3, 2);
+-- Since there are no stats we use shared buffers size to estimate number of rows
+EXPLAIN (COSTS ON)
+SELECT *
+FROM hyper_estimate;
+                                         QUERY PLAN                                          
+---------------------------------------------------------------------------------------------
+ Append  (cost=100.00..166847.40 rows=4118040 width=20)
+   ->  Foreign Scan on _hyper_16_37_dist_chunk  (cost=100.00..32468.60 rows=915120 width=20)
+   ->  Foreign Scan on _hyper_16_38_dist_chunk  (cost=100.00..32468.60 rows=915120 width=20)
+   ->  Foreign Scan on _hyper_16_39_dist_chunk  (cost=100.00..32468.60 rows=915120 width=20)
+   ->  Foreign Scan on _hyper_16_40_dist_chunk  (cost=100.00..16283.80 rows=457560 width=20)
+   ->  Foreign Scan on _hyper_16_41_dist_chunk  (cost=100.00..16283.80 rows=457560 width=20)
+   ->  Foreign Scan on _hyper_16_42_dist_chunk  (cost=100.00..16283.80 rows=457560 width=20)
+(7 rows)
+
+-- This will calculate the stats
+ANALYZE hyper_estimate;
+EXPLAIN (COSTS ON)
+SELECT *
+FROM hyper_estimate;
+                                      QUERY PLAN                                      
+--------------------------------------------------------------------------------------
+ Append  (cost=100.00..606.52 rows=15 width=20)
+   ->  Foreign Scan on _hyper_16_37_dist_chunk  (cost=100.00..101.12 rows=4 width=20)
+   ->  Foreign Scan on _hyper_16_38_dist_chunk  (cost=100.00..101.06 rows=2 width=20)
+   ->  Foreign Scan on _hyper_16_39_dist_chunk  (cost=100.00..101.09 rows=3 width=20)
+   ->  Foreign Scan on _hyper_16_40_dist_chunk  (cost=100.00..101.06 rows=2 width=20)
+   ->  Foreign Scan on _hyper_16_41_dist_chunk  (cost=100.00..101.09 rows=3 width=20)
+   ->  Foreign Scan on _hyper_16_42_dist_chunk  (cost=100.00..101.03 rows=1 width=20)
+(7 rows)
+
+-- Let's insert data into a new chunk. This will result in chunk creation.
+INSERT INTO hyper_estimate VALUES ('2019-11-11 06:01', 1, 1.1);
+-- We have stats for previous chunks so we can interpolate number of rows for the new chunk
+EXPLAIN (COSTS ON)
+SELECT *
+FROM hyper_estimate;
+                                      QUERY PLAN                                      
+--------------------------------------------------------------------------------------
+ Append  (cost=100.00..706.58 rows=17 width=20)
+   ->  Foreign Scan on _hyper_16_37_dist_chunk  (cost=100.00..101.12 rows=4 width=20)
+   ->  Foreign Scan on _hyper_16_38_dist_chunk  (cost=100.00..101.06 rows=2 width=20)
+   ->  Foreign Scan on _hyper_16_39_dist_chunk  (cost=100.00..101.09 rows=3 width=20)
+   ->  Foreign Scan on _hyper_16_40_dist_chunk  (cost=100.00..101.06 rows=2 width=20)
+   ->  Foreign Scan on _hyper_16_41_dist_chunk  (cost=100.00..101.09 rows=3 width=20)
+   ->  Foreign Scan on _hyper_16_42_dist_chunk  (cost=100.00..101.03 rows=1 width=20)
+   ->  Foreign Scan on _hyper_16_43_dist_chunk  (cost=100.00..100.05 rows=2 width=20)
+(8 rows)
+
--- a/tsl/test/expected/partitionwise_distributed-11.out
+++ b/tsl/test/expected/partitionwise_distributed-11.out
--- a/tsl/test/expected/partitionwise_distributed-12.out
+++ b/tsl/test/expected/partitionwise_distributed-12.out
--- a/tsl/test/sql/debug_notice.sql
+++ b/tsl/test/sql/debug_notice.sql
@ -48,6 +48,7 @@ INSERT INTO hyper VALUES
       ('2018-05-20 15:08', 5, 1, 9.4),
       ('2018-05-30 13:02', 3, 2, 9.0);

+-- Update table stats
 ANALYZE hyper;

 -- Optimizer debug messages shown at debug level 2
--- a/tsl/test/sql/hypertable_distributed.sql.in
+++ b/tsl/test/sql/hypertable_distributed.sql.in
@ -4,12 +4,12 @@

 -- Need to be super user to create extension and add data nodes
 \c :TEST_DBNAME :ROLE_CLUSTER_SUPERUSER;
-\ir include/remote_exec.sql

-- Support for execute_sql_and_filter_data_node_name_on_error()
 \unset ECHO
 \o /dev/null
 \ir include/filter_exec.sql
+\ir include/remote_exec.sql
+\ir utils/testsupport.sql
 \o
 \set ECHO all

@ -950,7 +950,56 @@ INSERT INTO dist_device VALUES
       ('2017-01-01 09:11', 3, 2.1),
       ('2017-01-01 08:01', 1, 1.2);

-EXPLAIN VERBOSE
+EXPLAIN (VERBOSE, COSTS OFF)
 SELECT * FROM dist_device;

 SELECT * FROM dist_device;
+-- Test estimating relation size without stats
+CREATE TABLE hyper_estimate(time timestamptz, device int, temp float);
+SELECT * FROM create_distributed_hypertable('hyper_estimate', 'time', 'device', number_partitions => 3, replication_factor => 1, chunk_time_interval => INTERVAL '7 days');
+
+-- This will enable us to more easily see estimates per chunk
+SET timescaledb.enable_per_data_node_queries = false;
+
+-- Estimating chunk progress uses current timestamp so we override it for test purposes 
+SELECT ts_test_override_current_timestamptz('2019-11-11 00:00'::timestamptz);
+
+-- Test estimates when backfilling. 3 chunks should be historical and 3 should be considered current when estimating.
+-- Note that estimate numbers are way off since we are using shared buffer size as starting point. This will not be
+-- an issue in 'production' like env since chunk size should be similar to shared buffer size.
+INSERT INTO hyper_estimate VALUES
+       ('2017-01-01 06:01', 1, 1.1),
+       ('2017-01-01 09:11', 1, 2.1),
+       ('2017-01-01 08:01', 1, 1.2),
+       ('2017-01-02 08:01', 1, 1.3),
+       ('2017-01-02 08:01', 2, 1.6),
+       ('2017-01-02 06:01', 2, 1.4),
+       ('2017-01-03 01:01', 3, 2),
+       ('2017-01-03 01:16', 3, 3),
+       ('2017-01-03 01:17', 3, 4),
+       ('2018-01-13 01:01', 1, 2),
+       ('2018-01-13 01:10', 1, 0.4),
+       ('2018-01-13 02:10', 2, 1.4),
+       ('2018-01-13 05:01', 2, 2),
+       ('2018-01-13 05:50', 2, 4),
+       ('2018-01-13 16:01', 3, 2);
+
+-- Since there are no stats we use shared buffers size to estimate number of rows
+EXPLAIN (COSTS ON)
+SELECT *
+FROM hyper_estimate;
+
+-- This will calculate the stats
+ANALYZE hyper_estimate;
+
+EXPLAIN (COSTS ON)
+SELECT *
+FROM hyper_estimate;
+
+-- Let's insert data into a new chunk. This will result in chunk creation.
+INSERT INTO hyper_estimate VALUES ('2019-11-11 06:01', 1, 1.1);
+
+-- We have stats for previous chunks so we can interpolate number of rows for the new chunk
+EXPLAIN (COSTS ON)
+SELECT *
+FROM hyper_estimate;
--- a/tsl/test/sql/partitionwise_distributed.sql.in
+++ b/tsl/test/sql/partitionwise_distributed.sql.in
@ -5,10 +5,7 @@
 -- Need to be super user to create extension and add data nodes
 \c :TEST_DBNAME :ROLE_CLUSTER_SUPERUSER;
 \ir include/remote_exec.sql
-
-CREATE OR REPLACE FUNCTION ts_test_override_pushdown_timestamptz(new_value TIMESTAMPTZ) RETURNS VOID
-AS :TSL_MODULE_PATHNAME, 'ts_test_override_pushdown_timestamptz'
-LANGUAGE C VOLATILE STRICT;
+\ir utils/testsupport.sql

 -- Cleanup from other potential tests that created these databases
 SET client_min_messages TO ERROR;
@ -658,7 +655,7 @@ GROUP BY 1, 2
 LIMIT 1;

 -- contains whitelisted time expressions
-SELECT ts_test_override_pushdown_timestamptz('2018-06-01 00:00'::timestamptz);
+SELECT ts_test_override_current_timestamptz('2018-06-01 00:00'::timestamptz);

 EXPLAIN (VERBOSE, COSTS OFF)
 SELECT time, device, avg(temp)
@ -685,7 +682,7 @@ LIMIT 1;
 EXPLAIN (VERBOSE, COSTS OFF)
 EXECUTE timestamp_pushdown_test;

-SELECT ts_test_override_pushdown_timestamptz('2019-10-15 00:00'::timestamptz);
+SELECT ts_test_override_current_timestamptz('2019-10-15 00:00'::timestamptz);

 EXPLAIN (VERBOSE, COSTS OFF)
 EXECUTE timestamp_pushdown_test;
--- a/tsl/test/src/remote/scan_exec_debug.c
+++ b/tsl/test/src/remote/scan_exec_debug.c
@ -8,13 +8,13 @@
 #include "export.h"
 #include "fdw/scan_exec.h"

-TS_FUNCTION_INFO_V1(ts_test_override_pushdown_timestamptz);
+TS_FUNCTION_INFO_V1(ts_test_override_current_timestamptz);

 Datum
-ts_test_override_pushdown_timestamptz(PG_FUNCTION_ARGS)
+ts_test_override_current_timestamptz(PG_FUNCTION_ARGS)
 {
 #ifdef TS_DEBUG
-	fdw_scan_debug_override_pushdown_timestamp(PG_GETARG_INT64(0));
+	fdw_scan_debug_override_current_timestamp(PG_GETARG_INT64(0));
 	PG_RETURN_VOID();
 #else
 	elog(ERROR, "unable to handle ts_test_is_frontend_session without TS_DEBUG flag set");