Disable gapfill on distributed hypertable

There are number of issues when time_bucket_gapfill is run on distributed hypertable. Thus a non-supported error is returned in this case until the issues are fixed.
2025-05-17 11:03:36 +08:00 · 2020-09-03 10:44:58 +02:00 · 2020-09-03 10:44:58 +02:00 · 1fa072acb4
commit 1fa072acb4
parent d5a6a5b193
6 changed files with 101 additions and 6 deletions
--- a/tsl/src/fdw/data_node_scan_plan.c
+++ b/tsl/src/fdw/data_node_scan_plan.c
@ -486,6 +486,11 @@ data_node_scan_add_node_paths(PlannerInfo *root, RelOptInfo *hyper_rel)
 	ts_cache_release(hcache);
 }

+/*
+ * Creates CustomScanPath for the data node and adds to output_rel. No custom_path is added,
+ * i.e., it is encapsulated by the CustomScanPath, so it doesn't inflate continuation of the
+ * planning and will be planned locally on the data node.
+ */
 void
 data_node_scan_create_upper_paths(PlannerInfo *root, UpperRelationKind stage, RelOptInfo *input_rel,
 								  RelOptInfo *output_rel, void *extra)
--- a/tsl/src/nodes/gapfill/planner.c
+++ b/tsl/src/nodes/gapfill/planner.c
@ -396,10 +396,12 @@ gapfill_path_create(PlannerInfo *root, Path *subpath, FuncExpr *func)
 }

 /*
- * Prepend GapFill node to every group_rel path
+ * Prepend GapFill node to every group_rel path.
+ * The implementation assumes that TimescaleDB planning hook is called only once
+ * per grouping.
 */
 void
-plan_add_gapfill(PlannerInfo *root, RelOptInfo *group_rel)
+plan_add_gapfill(PlannerInfo *root, RelOptInfo *group_rel, bool dist_ht)
 {
 	ListCell *lc;
 	Query *parse = root->parse;
@ -409,7 +411,10 @@ plan_add_gapfill(PlannerInfo *root, RelOptInfo *group_rel)
 		return;

 	/*
-	 * look for time_bucket_gapfill function call
+	 * Look for time_bucket_gapfill function call in the target list, which
+	 * will succeed on every call to plan_add_gapfill, thus it will lead to
+	 * incorrect query plan if plan_add_gapfill is called more than once per
+	 * grouping.
 	 */
 	gapfill_function_walker((Node *) parse->targetList, &context);

@ -421,6 +426,17 @@ plan_add_gapfill(PlannerInfo *root, RelOptInfo *group_rel)
 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 				 errmsg("multiple time_bucket_gapfill calls not allowed")));

+	/* Executing time_bucket_gapfill on distributed hypertable produces incorrect result,
+	 * returns internal planning error or crashes if grouping includes a space dimension.
+	 * Thus time_bucket_gapfill is temporary disabled until it is fixed.
+	 */
+	if (dist_ht)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("time_bucket_gapfill not implemented for distributed hypertable"),
+				 errdetail("Current version doesn't implement support for time_bucket_gapfill on "
+						   "distributed hypertables.")));
+
 	if (context.count == 1)
 	{
 		List *copy = group_rel->pathlist;
--- a/tsl/src/nodes/gapfill/planner.h
+++ b/tsl/src/nodes/gapfill/planner.h
@ -8,7 +8,7 @@

 #include <postgres.h>

-void plan_add_gapfill(PlannerInfo *, RelOptInfo *);
+void plan_add_gapfill(PlannerInfo *root, RelOptInfo *group_rel, bool dist_ht);
 void gapfill_adjust_window_targetlist(PlannerInfo *root, RelOptInfo *input_rel,
 									  RelOptInfo *output_rel);

--- a/tsl/src/planner.c
+++ b/tsl/src/planner.c
@ -49,11 +49,13 @@ tsl_create_upper_paths_hook(PlannerInfo *root, UpperRelationKind stage, RelOptIn
 							RelOptInfo *output_rel, TsRelType input_reltype, Hypertable *ht,
 							void *extra)
 {
+	bool dist_ht = false;
 	switch (input_reltype)
 	{
 		case TS_REL_HYPERTABLE:
 		case TS_REL_HYPERTABLE_CHILD:
-			if (hypertable_is_distributed(ht))
+			dist_ht = hypertable_is_distributed(ht);
+			if (dist_ht)
 				data_node_scan_create_upper_paths(root, stage, input_rel, output_rel, extra);
 			break;
 		default:
@ -61,7 +63,7 @@ tsl_create_upper_paths_hook(PlannerInfo *root, UpperRelationKind stage, RelOptIn
 	}

 	if (UPPERREL_GROUP_AGG == stage)
-		plan_add_gapfill(root, output_rel);
+		plan_add_gapfill(root, output_rel, dist_ht);
 	else if (UPPERREL_WINDOW == stage && IsA(linitial(input_rel->pathlist), CustomPath))
 		gapfill_adjust_window_targetlist(root, input_rel, output_rel);
 	else if (ts_guc_enable_async_append && UPPERREL_FINAL == stage &&
--- a/tsl/test/expected/dist_api_calls.out
+++ b/tsl/test/expected/dist_api_calls.out
@ -216,6 +216,44 @@ _timescaledb_internal._dist_hyper_1_8_chunk
 
 (1 row)

+-- Simple test of time_bucket_gapfill, which is disabled for now.
+\set ON_ERROR_STOP 0
+SELECT time_bucket_gapfill('3 hours', time, '2017-01-01 06:00', '2017-01-02 18:00'),
+       first(value, time),
+       avg(value)
+FROM disttable
+GROUP BY 1;
+ERROR:  time_bucket_gapfill not implemented for distributed hypertable
+SELECT time_bucket_gapfill('3 hours', time, '2017-01-01 06:00', '2017-01-01 18:00'),
+       device,
+       first(value, time),
+       avg(value)
+FROM disttable
+GROUP BY 1,2;
+ERROR:  time_bucket_gapfill not implemented for distributed hypertable
+SELECT
+  time_bucket_gapfill('3 hours', time, '2017-01-01 06:00', '2017-01-01 18:00'),
+  lag(min(time)) OVER ()
+FROM disttable
+GROUP BY 1;
+ERROR:  time_bucket_gapfill not implemented for distributed hypertable
+-- Test the same queries with enabled partitionwise aggregate
+SET enable_partitionwise_aggregate = 'on';
+SELECT time_bucket_gapfill('3 hours', time, '2017-01-01 06:00', '2017-01-02 18:00'),
+       first(value, time),
+       avg(value)
+FROM disttable
+GROUP BY 1;
+ERROR:  time_bucket_gapfill not implemented for distributed hypertable
+SELECT time_bucket_gapfill('3 hours', time, '2017-01-01 06:00', '2017-01-01 18:00'),
+       device,
+       first(value, time),
+       avg(value)
+FROM disttable
+GROUP BY 1,2;
+ERROR:  time_bucket_gapfill not implemented for distributed hypertable
+SET enable_partitionwise_aggregate = 'off';
+\set ON_ERROR_STOP 1
 -- Ensure that move_chunk() and reorder_chunk() functions cannot be used
 -- with distributed hypertable
 SET ROLE TO DEFAULT;
--- a/tsl/test/sql/dist_api_calls.sql
+++ b/tsl/test/sql/dist_api_calls.sql
@ -67,6 +67,40 @@ SELECT * FROM disttable ORDER BY time;
 SELECT * FROM show_chunks('disttable');
 SELECT * FROM test.remote_exec(NULL, $$ SELECT show_chunks('disttable'); $$);

+-- Simple test of time_bucket_gapfill, which is disabled for now.
+\set ON_ERROR_STOP 0
+SELECT time_bucket_gapfill('3 hours', time, '2017-01-01 06:00', '2017-01-02 18:00'),
+       first(value, time),
+       avg(value)
+FROM disttable
+GROUP BY 1;
+SELECT time_bucket_gapfill('3 hours', time, '2017-01-01 06:00', '2017-01-01 18:00'),
+       device,
+       first(value, time),
+       avg(value)
+FROM disttable
+GROUP BY 1,2;
+SELECT
+  time_bucket_gapfill('3 hours', time, '2017-01-01 06:00', '2017-01-01 18:00'),
+  lag(min(time)) OVER ()
+FROM disttable
+GROUP BY 1;
+-- Test the same queries with enabled partitionwise aggregate
+SET enable_partitionwise_aggregate = 'on';
+SELECT time_bucket_gapfill('3 hours', time, '2017-01-01 06:00', '2017-01-02 18:00'),
+       first(value, time),
+       avg(value)
+FROM disttable
+GROUP BY 1;
+SELECT time_bucket_gapfill('3 hours', time, '2017-01-01 06:00', '2017-01-01 18:00'),
+       device,
+       first(value, time),
+       avg(value)
+FROM disttable
+GROUP BY 1,2;
+SET enable_partitionwise_aggregate = 'off';
+\set ON_ERROR_STOP 1
+
 -- Ensure that move_chunk() and reorder_chunk() functions cannot be used
 -- with distributed hypertable
 SET ROLE TO DEFAULT;